260 minor fixes (#261)

* changes to support generating geofabric tiles that do not overlap the specified coarse DEM raster * Added code to ensure waterways are clipped prior to raster value selection to avoid out of bounds errors * Fixed issue when adding multiple rivers to the same DEM * Split out creation of an empty DEM from the creation of an populated DEM * update version * Allow level of compression for final netCDF output files to be set in the instruction files * Update rivers interpolation to dynamically support interpolation from fewer points if less supplied * updated tests to reflect minor changes resulting from fixes * fixup: Format Python code with Black --------- Co-authored-by: github-actions <github-actions@github.com>
rosepearson · Sep 12, 2024 · 71d92f1 · 71d92f1
1 parent e1c9277
commit 71d92f1
Show file tree

Hide file tree

Showing 10 changed files with 306 additions and 160 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "geofabrics"
-version = "1.1.19"
+version = "1.1.20"
 description = "A package for creating geofabrics for flood modelling."
 readme = "README.md"
 authors = [{ name = "Rose pearson", email = "rose.pearson@niwa.co.nz" }]

diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py
@@ -146,7 +146,7 @@ def calculate_dem_bounds(self, dem):
     def _set_up(self):
         """Set DEM CRS and trim the DEM to size"""
 
-        self._dem.rio.set_crs(self._extents["total"].crs)
+        self._dem.rio.write_crs(self._extents["total"].crs, inplace=True)
 
         # Calculate DEM bounds and check for overlap before clip
         dem_bounds = self.calculate_dem_bounds(self._dem)
@@ -1257,11 +1257,18 @@ def interpolate_rivers(
             region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution),
             drop=True,
         )
+        self._write_netcdf_conventions_in_place(edge_dem, self.catchment_geometry.crs)
+        edge_dem["z"] = edge_dem.z.rio.interpolate_na(method="nearest")
+        edge_dem = self._dem.rio.clip(
+            region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution),
+            drop=True,
+        )
         edge_dem = edge_dem.rio.clip(
             region_to_rasterise.dissolve().geometry,
             invert=True,
             drop=True,
         )
+
         # Define the river and mouth edge points
         grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y)
         flat_x = grid_x.flatten()
@@ -1325,6 +1332,22 @@ def interpolate_rivers(
         )
         pdal_pipeline.execute()
 
+        if (
+            len(river_points) < k_nearest_neighbours
+            or len(edge_points) < k_nearest_neighbours
+        ):
+            logging.info(
+                f"Fewer river or edge points than the default expected {k_nearest_neighbours}. "
+                f"Updating k_nearest_neighbours to {min(len(river_points), len(edge_points))}."
+            )
+            k_nearest_neighbours = min(len(river_points), len(edge_points))
+        if k_nearest_neighbours < 3:
+            logging.warning(
+                f"Not enough river or edge points to meaningfully include {k_nearest_neighbours}. "
+                f"Exiting without including the river and edge points."
+            )
+            return
+
         if self.chunk_size is None:
             logging.warning("Chunksize of none. set to DEM shape.")
             self.chunk_size = max(len(self._dem.x), len(self._dem.y))
@@ -1768,20 +1791,10 @@ def add_lidar(
                 -resolution,
                 dtype=raster_options["raster_type"],
             )
-            metadata["instructions"]["dataset_mapping"]["lidar"] = {
-                "no LiDAR": self.SOURCE_CLASSIFICATION["no data"]
-            }
-            elevations = {
-                "no LiDAR": dask.array.full(
-                    fill_value=numpy.nan,
-                    shape=(len(y), len(x)),
-                    dtype=raster_options["raster_type"],
-                )
-            }
-            dem = self._create_data_set(
+            dem = self._create_empty_data_set(
                 x=x,
                 y=y,
-                elevations=elevations,
+                raster_type=raster_options["raster_type"],
                 metadata=metadata,
             )
         elif self.chunk_size is None:
@@ -2172,6 +2185,107 @@ def _create_data_set(
 
         return dem
 
+    def _create_empty_data_set(
+        self,
+        x: numpy.ndarray,
+        y: numpy.ndarray,
+        raster_type: str,
+        metadata: dict,
+    ) -> xarray.Dataset:
+        """A function to create a new but empty dataset from x, y arrays.
+
+        Parameters
+        ----------
+
+            x
+                X coordinates of the dataset.
+            y
+                Y coordinates of the dataset.
+            elevations
+                A dictionary of elevations over the x, and y coordiantes.Keyed
+                by the dataset name.
+            metadata
+                Used to pull out the dataset mapping for creating the
+                lidar_source layer
+        """
+
+        # Create the empty layers - z, data_source, lidar_source
+        # Set NaN where not z values so merging occurs correctly
+        z = dask.array.full(
+            fill_value=numpy.nan,
+            shape=(len(y), len(x)),
+            dtype=raster_type,
+            chunks={"x": self.chunk_size, "y": self.chunk_size},
+        )
+
+        # Create source variable - assume all values are defined from LiDAR
+        data_source = dask.array.full(
+            fill_value=self.SOURCE_CLASSIFICATION["no data"],
+            shape=(len(y), len(x)),
+            dtype=raster_type,
+            chunks={"x": self.chunk_size, "y": self.chunk_size},
+        )
+
+        # Create LiDAR id variable - name and value info in the metadata
+        lidar_source = dask.array.full(
+            fill_value=self.SOURCE_CLASSIFICATION["no data"],
+            shape=(len(y), len(x)),
+            dtype=raster_type,
+            chunks={"x": self.chunk_size, "y": self.chunk_size},
+        )
+
+        dem = xarray.Dataset(
+            data_vars=dict(
+                z=(
+                    ["y", "x"],
+                    z,
+                    {
+                        "units": "m",
+                        "long_name": "ground elevation",
+                        "vertical_datum": "EPSG:"
+                        f"{self.catchment_geometry.crs['vertical']}",
+                    },
+                ),
+                data_source=(
+                    ["y", "x"],
+                    data_source,
+                    {
+                        "units": "",
+                        "long_name": "source data classification",
+                        "mapping": f"{self.SOURCE_CLASSIFICATION}",
+                    },
+                ),
+                lidar_source=(
+                    ["y", "x"],
+                    lidar_source,
+                    {
+                        "units": "",
+                        "long_name": "source lidar ID",
+                        "mapping": f"{{'no LiDAR': self.SOURCE_CLASSIFICATION['no data']}}",
+                    },
+                ),
+            ),
+            coords=dict(x=(["x"], x), y=(["y"], y)),
+            attrs={
+                "title": "Geofabric representing elevation and roughness",
+                "source": f"{metadata['library_name']} version "
+                f"{metadata['library_version']}",
+                "description": f"{metadata['library_name']}:"
+                f"{metadata['class_name']} resolution "
+                f"{self.catchment_geometry.resolution}",
+                "history": f"{metadata['utc_time']}:"
+                f"{metadata['library_name']}:{metadata['class_name']} "
+                f"version {metadata['library_version']} "
+                f"resolution {self.catchment_geometry.resolution};",
+                "geofabrics_instructions": f"{metadata['instructions']}",
+            },
+        )
+        # ensure the expected CF conventions are followed
+        self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs)
+        dem = dem.chunk(chunks={"x": self.chunk_size, "y": self.chunk_size})
+
+        return dem
+
 
 class PatchDem(LidarBase):
     """A class to manage the addition of a DEM to the foreground or background
@@ -2260,7 +2374,7 @@ def add_patch(self, patch_path: pathlib.Path, label: str, layer: str):
             patch_path,
             masked=True,
         ).squeeze("band", drop=True)
-        patch.rio.set_crs(self.catchment_geometry.crs["horizontal"])
+        patch.rio.write_crs(self.catchment_geometry.crs["horizontal"], inplace=True)
         patch_resolution = patch.rio.resolution()
         patch_resolution = max(abs(patch_resolution[0]), abs(patch_resolution[1]))
         # Define region to patch within

diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py
@@ -170,7 +170,11 @@ def create_results_folder(self):
         results_folder.mkdir(parents=True, exist_ok=True)
 
     def save_dem(
-        self, filename: pathlib.Path, dataset: xarray.Dataset, generator: dem.DemBase
+        self,
+        filename: pathlib.Path,
+        dataset: xarray.Dataset,
+        generator: dem.DemBase,
+        compression: int,
     ):
         """Save out the dem/geofabrics labelled array.
 
@@ -190,7 +194,7 @@ def save_dem(
                 "In processor.DemGenerator - write out the raw DEM to "
                 f"netCDF: {filename}"
             )
-            compression = {"zlib": True, "complevel": 1}
+            compression = {"zlib": True, "complevel": compression}
         elif filename.suffix.lower() == ".tif":
             self.logger.info(
                 "In processor.DemGenerator - write out the raw DEM as a "
@@ -304,6 +308,7 @@ def get_instruction_general(self, key: str, subkey: str = None):
                 "is_depth": False,
             },
             "filter_waterways_by_osm_ids": [],
+            "compression": 1,
         }
 
         if key not in defaults and key not in self.instructions["general"]:
@@ -505,6 +510,7 @@ def get_vector_or_raster_paths(
                         api_key,
                         bounding_polygon=bounding_polygon,
                         verbose=True,
+                        crs=self.get_crs()["horizontal"],
                     )
 
                     api_instruction = self.instructions["datasets"][data_type][
@@ -1005,10 +1011,19 @@ def run(self):
                 cached_file = temp_file
 
             # Add a coarse DEM if significant area without LiDAR and a coarse DEM
-            if self.check_vector_or_raster(key="coarse_dems", api_type="raster"):
-                coarse_dem_paths = self.get_vector_or_raster_paths(
-                    key="coarse_dems", data_type="raster"
+            coarse_dem_paths = self.get_vector_or_raster_paths(
+                key="coarse_dems", data_type="raster", required=False
+            )
+            if (
+                self.check_vector_or_raster(key="coarse_dems", api_type="raster")
+                and len(coarse_dem_paths) == 0
+            ):
+                logging.warning(
+                    "The coarse dem keyword specified in the instructions file, "
+                    "but no paths recovered - empty list. Please check the "
+                    "instruction file contents."
                 )
+            elif len(coarse_dem_paths) > 0:
                 self.logger.info(f"Incorporating coarse DEMs: {coarse_dem_paths}")
                 del raw_dem
                 raw_dem = dem.PatchDem(
@@ -1057,6 +1072,7 @@ def run(self):
                 filename=pathlib.Path(self.get_instruction_path("raw_dem")),
                 dataset=raw_dem.dem,
                 generator=raw_dem,
+                compression=self.get_instruction_general("compression"),
             )
             self.logger.info(f"Remove folder {temp_folder} for temporary files")
             del raw_dem
@@ -1282,7 +1298,7 @@ def add_hydrological_features(
                     ),
                     cache_path=temp_folder,
                 )
-                temp_file = temp_folder / "dem_added_rivers.nc"
+                temp_file = temp_folder / f"dem_added_{index + 1}_rivers.nc"
                 self.logger.info(
                     f"Save temp DEM with rivers added to netCDF: {temp_file}"
                 )
@@ -1405,6 +1421,7 @@ def run(self):
                     filename=result_path,
                     dataset=hydrologic_dem.dem,
                     generator=hydrologic_dem,
+                    compression=self.get_instruction_general("compression"),
                 )
                 del hydrologic_dem
             except (Exception, KeyboardInterrupt) as caught_exception:
@@ -1570,6 +1587,7 @@ def run(self):
                     filename=self.get_instruction_path("result_dem"),
                     dataset=patch_dem.dem,
                     generator=patch_dem,
+                    compression=self.get_instruction_general("compression"),
                 )
             except (Exception, KeyboardInterrupt) as caught_exception:
                 pathlib.Path(self.get_instruction_path("result_dem")).unlink()
@@ -1764,6 +1782,7 @@ def run(self):
                 filename=self.get_instruction_path("result_geofabric"),
                 dataset=roughness_dem.dem,
                 generator=roughness_dem,
+                compression=self.get_instruction_general("compression"),
             )
             del roughness_dem
             self.logger.info(
@@ -3125,7 +3144,11 @@ def estimate_closed_elevations(
             self.logger.info("Closed waterways already recorded. ")
             return
         # If not - estimate elevations along close waterways
-        closed_waterways = waterways[waterways["tunnel"]]
+        dem_bounds = geopandas.GeoSeries(
+            [shapely.geometry.box(*dem.rio.bounds())], crs=dem.rio.crs
+        )
+        closed_waterways = waterways.clip(dem_bounds, keep_geom_type=True, sort=True)
+        closed_waterways = closed_waterways[closed_waterways["tunnel"]]
         closed_waterways["polygon"] = closed_waterways.buffer(
             closed_waterways["width"].to_numpy()
         )
@@ -3207,7 +3230,11 @@ def estimate_open_elevations(
             self.logger.info("Open waterways already recorded. ")
             return
         # If not - estimate the elevations along the open waterways - drop any invalid geometries
-        open_waterways = waterways[numpy.logical_not(waterways["tunnel"])]
+        dem_bounds = geopandas.GeoSeries(
+            [shapely.geometry.box(*dem.rio.bounds())], crs=dem.rio.crs
+        )
+        open_waterways = waterways.clip(dem_bounds, keep_geom_type=True, sort=True)
+        open_waterways = open_waterways[numpy.logical_not(open_waterways["tunnel"])]
         open_waterways = open_waterways[~open_waterways.geometry.isna()]
         # sample the ends of the waterway - sample over a polygon at each end
         polygons = open_waterways.interpolate(0).buffer(

diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py
@@ -3,4 +3,4 @@
 Contains the package version information
 """
 
-__version__ = "1.1.19"
+__version__ = "1.1.20"
diff --git a/tests/test_many_stages_westport/data/benchmark.nc b/tests/test_many_stages_westport/data/benchmark.nc