import numpy as np import rasterio import geopandas as gpd import cv2 from shapely.geometry import shape,Polygon from shapely.validation import make_valid from collections import defaultdict def onehot(components: np.ndarray) -> np.ndarray: oh = np.zeros((components.max() + 1, *components.shape), dtype=np.uint8) for i in range(oh.shape[0]): oh[i][components == i] = 1 if 0 in np.unique(components): oh = oh[1:] return oh def apply_transform(polygon: Polygon, transform: rasterio.Affine) -> Polygon: return Polygon([transform * c for c in polygon.exterior.coords]) def polygonize_raster( data_img:np.ndarray, raster: np.ndarray, tolerance: float, transform: rasterio.transform.Affine = None, crs = None, pixel_threshold: int = 25, ) -> gpd.GeoDataFrame: data = defaultdict(list) onehot_raster = onehot(raster) if onehot_raster.shape[0] == 0: return None for i in range(onehot_raster.shape[0]): mask = onehot_raster[i] if mask.sum() < pixel_threshold: continue contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: perimeter = cv2.arcLength(contour, True) area = cv2.contourArea(contour) approx = cv2.approxPolyDP(contour, tolerance * perimeter, True) contour = approx.squeeze() if contour.shape[0] < 3: continue poly = shape({"type": "Polygon", "coordinates": [contour]}) if transform is not None: poly = apply_transform(poly, transform) most_freq_class = np.argmax(np.bincount(data_img[onehot_raster[i].astype(bool)])) data["geometry"].append(poly) data["component"].append(i) data["class"].append(most_freq_class) data["area"].append(area) gdf = gpd.GeoDataFrame(data, crs=crs) gdf["geometry"] = gdf["geometry"].buffer(9.0) gdf["geometry"] = gdf["geometry"].apply(make_valid) return gdf