From 15af8ce191adaca5c90bc4835f6e5fd238943173 Mon Sep 17 00:00:00 2001 From: Rose Pearson Date: Sun, 28 Jan 2024 11:21:41 +1300 Subject: [PATCH] try split lidar and clipping (#240) * separate LiDAR and clipping stages and cache file in-between * fixup: Format Python code with Black * Updated windows environment yml now that dependencies are all conda * update version --------- Co-authored-by: github-actions --- environment_windows.yml | 26 ++++------------ pyproject.toml | 2 +- src/geofabrics/dem.py | 41 ++++++++++++++++++-------- src/geofabrics/processor.py | 59 +++++++++++++++++++++++++++++++++---- src/geofabrics/version.py | 2 +- 5 files changed, 89 insertions(+), 41 deletions(-) diff --git a/environment_windows.yml b/environment_windows.yml index f778bbeb..bb4fd1ca 100644 --- a/environment_windows.yml +++ b/environment_windows.yml @@ -1,25 +1,11 @@ -# YML file for setting up a virtual environment capable of running GeoFabrics. It includes packages used for processing point clouds, GIS and raster to produce raster using spyder. - name: geofabrics channels: - conda-forge - defaults dependencies: - - pyproj - - pip - - gdal - - geopandas >=0.10.2 - - rioxarray - - pdal - - python-pdal - - python-dotenv - - matplotlib - - dask - - distributed - - spyder - - spyder-kernels - - pytest - - geoapis >=0.3.0 - - pip: - - netcdf4 - - osmpythontools >= 0.3.5 + - rioxarray >= 0.15.1 + - geoapis>= 0.3.2 + - osmpythontools >= 0.3.5 + - python-pdal >= 3.3.0 + - distributed >= 2023.6.0 + - pytest >= 7.4.0 diff --git a/pyproject.toml b/pyproject.toml index 91d8395f..d2c835e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta" [project] name = "geofabrics" -version = "1.1.9" +version = "1.1.10" description = "A package for creating geofabrics for flood modelling." readme = "README.md" authors = [{ name = "Rose pearson", email = "rose.pearson@niwa.co.nz" }] diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py index 25335f41..b3a91dcf 100644 --- a/src/geofabrics/dem.py +++ b/src/geofabrics/dem.py @@ -1222,7 +1222,9 @@ def _load_dem(self, filename: pathlib.Path) -> xarray.Dataset: return dem - def save_dem(self, filename: pathlib.Path, dem: xarray.Dataset): + def save_dem( + self, filename: pathlib.Path, dem: xarray.Dataset, encoding: dict = None + ): """Save the DEM to a netCDF file and optionally reload it :param filename: .nc file where to save the DEM @@ -1234,7 +1236,13 @@ def save_dem(self, filename: pathlib.Path, dem: xarray.Dataset): ), "all DataArray variables of a xarray.Dataset must have a CRS" try: - dem.to_netcdf(filename, format="NETCDF4", engine="netcdf4") + self._write_netcdf_conventions_in_place(dem, self.catchment_geometry.crs) + if encoding is not None: + dem.to_netcdf( + filename, format="NETCDF4", engine="netcdf4", encoding=encoding + ) + else: + dem.to_netcdf(filename, format="NETCDF4", engine="netcdf4") dem.close() except (Exception, KeyboardInterrupt) as caught_exception: @@ -1456,18 +1464,27 @@ def add_lidar( metadata=metadata, ) + self._dem = dem + + def clip_lidar( + self, + ): + """Clip the a 'raw' DEM. Should be called immediately after the add_lidar function.""" + # Clip DEM to Catchment and ensure NaN outside region to rasterise catchment = self.catchment_geometry.catchment - dem = dem.rio.clip_box(**catchment.bounds.iloc[0]) - dem = dem.where(clip_mask(dem.z, catchment.geometry, self.chunk_size)) + self._dem = self._dem.rio.clip_box(**catchment.bounds.iloc[0]) + self._dem = self._dem.where( + clip_mask(self._dem.z, catchment.geometry, self.chunk_size) + ) # Check if the ocean is clipped or not (must be in all datasets) drop_offshore_lidar = all(self.drop_offshore_lidar.values()) land_and_foreshore = self.catchment_geometry.land_and_foreshore if drop_offshore_lidar and land_and_foreshore.area.sum() > 0: # If area of 0 size, all will be NaN anyway - mask = clip_mask(dem.z, land_and_foreshore.geometry, self.chunk_size) - dem = dem.where(mask) + mask = clip_mask(self._dem.z, land_and_foreshore.geometry, self.chunk_size) + self._dem = self._dem.where(mask) # If drop offshore LiDAR ensure the foreshore values are 0 or negative foreshore = self.catchment_geometry.foreshore @@ -1485,22 +1502,20 @@ def add_lidar( # Mask to delineate DEM outside of buffered foreshore or below 0 mask = ~( - (dem.z > 0) - & clip_mask(dem.z, buffered_foreshore.geometry, self.chunk_size) + (self._dem.z > 0) + & clip_mask(self._dem.z, buffered_foreshore.geometry, self.chunk_size) ) # Set any positive LiDAR foreshore points to zero - dem["z"] = dem.z.where(mask, 0) + self._dem["z"] = self._dem.z.where(mask, 0) - dem["data_source"] = dem.data_source.where( + self._dem["data_source"] = self._dem.data_source.where( mask, self.SOURCE_CLASSIFICATION["ocean bathymetry"] ) - dem["lidar_source"] = dem.lidar_source.where( + self._dem["lidar_source"] = self._dem.lidar_source.where( mask, self.SOURCE_CLASSIFICATION["no data"] ) - self._dem = dem - def _add_tiled_lidar_chunked( self, lidar_datasets_info: dict, diff --git a/src/geofabrics/processor.py b/src/geofabrics/processor.py index 0b2649a4..2f45e411 100644 --- a/src/geofabrics/processor.py +++ b/src/geofabrics/processor.py @@ -917,11 +917,19 @@ def run(self): "lidar_classifications_to_keep" ), metadata=self.create_metadata(), - ) # Note must be called after all others if it is to be complete + ) # Save a cached copy of DEM to temporary memory cache - self.logger.info("Save temp raw DEM to netCDF") cached_file = temp_folder / "raw_lidar.nc" + self.logger.info(f"Save temp raw DEM to netCDF: {cached_file}") + self.raw_dem.save_and_load_dem(cached_file) + + # Clip LiDAR - ensure within bounds/foreshore + self.raw_dem.clip_lidar() + + # Save a cached copy of DEM to temporary memory cache + cached_file = temp_folder / "raw_lidar_clipped.nc" + self.logger.info(f"Save temp raw DEM to netCDF: {cached_file}") self.raw_dem.save_and_load_dem(cached_file) # Add a coarse DEM if significant area without LiDAR and a coarse DEM @@ -943,8 +951,8 @@ def run(self): self.raw_dem.add_coarse_dem(coarse_dem_path, area_threshold) - self.logger.info("Save temp raw DEM to netCDF") temp_file = temp_folder / f"raw_dem_{coarse_dem_path.stem}.nc" + self.logger.info(f"Save temp raw DEM to netCDF: {cached_file}") self.raw_dem.save_and_load_dem(temp_file) # Remove previous cached file and replace with new one @@ -953,10 +961,28 @@ def run(self): # compute and save raw DEM self.logger.info( - "In processor.DemGenerator - write out the raw DEM to netCDF" + "In processor.DemGenerator - write out the raw DEM to netCDF: " + f"{self.get_instruction_path('raw_dem')}" ) + encoding = { + "data_source": { + "zlib": True, + "complevel": 1, + }, + "lidar_source": { + "zlib": True, + "complevel": 1, + }, + "z": { + "zlib": True, + "complevel": 1, + }, + } + encoding = None # Separately test compressing final netCDF outputs self.raw_dem.save_dem( - self.get_instruction_path("raw_dem"), dem=self.raw_dem.dem + self.get_instruction_path("raw_dem"), + dem=self.raw_dem.dem, + encoding=encoding, ) self.logger.info(f"Remove folder {temp_folder} for temporary files") shutil.rmtree(temp_folder) @@ -1342,11 +1368,32 @@ def run(self): # save results self.logger.info( "In processor.RoughnessLengthGenerator - write out " - "the raw DEM to netCDF" + "the raw DEM to netCDF: " + f"{self.get_instruction_path('result_geofabric')}" ) + encoding = { + "data_source": { + "zlib": True, + "complevel": 1, + }, + "lidar_source": { + "zlib": True, + "complevel": 1, + }, + "z": { + "zlib": True, + "complevel": 1, + }, + "zo": { + "zlib": True, + "complevel": 1, + }, + } + encoding = None # Separately test compressing final netCDF outputs self.roughness_dem.save_dem( filename=self.get_instruction_path("result_geofabric"), dem=self.roughness_dem.dem, + encoding=encoding, ) self.logger.info( "In processor.RoughnessLengthGenerator - clean folder for " diff --git a/src/geofabrics/version.py b/src/geofabrics/version.py index 4277da0f..89163c81 100644 --- a/src/geofabrics/version.py +++ b/src/geofabrics/version.py @@ -3,4 +3,4 @@ Contains the package version information """ -__version__ = "1.1.9" +__version__ = "1.1.10"