Skip to content

Commit

Permalink
Merge pull request #428 from GispoCoding/427-add-band-selection-to-de…
Browse files Browse the repository at this point in the history
…scriptive_statistics_raster

Add band parameter to descriptive statistics raster
  • Loading branch information
nmaarnio authored Sep 18, 2024
2 parents b95f3d0 + 9581b74 commit d36dad2
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 17 deletions.
6 changes: 3 additions & 3 deletions eis_toolkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,15 +786,15 @@ def compute_pca_vector_cli(

# DESCRIPTIVE STATISTICS (RASTER)
@app.command()
def descriptive_statistics_raster_cli(input_file: INPUT_FILE_OPTION):
def descriptive_statistics_raster_cli(input_raster: INPUT_FILE_OPTION, band: int = 1):
"""Generate descriptive statistics from raster data."""
from eis_toolkit.exploratory_analyses.descriptive_statistics import descriptive_statistics_raster

typer.echo("Progress: 10%")

with rasterio.open(input_file) as raster:
with rasterio.open(input_raster) as raster:
typer.echo("Progress: 25%")
results_dict = descriptive_statistics_raster(raster)
results_dict = descriptive_statistics_raster(raster, band)
typer.echo("Progress: 75%")

typer.echo("Progress: 100% \n")
Expand Down
51 changes: 38 additions & 13 deletions eis_toolkit/exploratory_analyses/descriptive_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import pandas as pd
import rasterio
from beartype import beartype
from beartype.typing import Union
from beartype.typing import Dict, Union
from statsmodels.stats import stattools
from statsmodels.stats.weightstats import DescrStatsW

from eis_toolkit.exceptions import InvalidColumnException
from eis_toolkit.exceptions import InvalidColumnException, InvalidRasterBandException


def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> dict:
def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame, gpd.GeoDataFrame]) -> Dict[str, float]:
statistics = DescrStatsW(data)
min = np.min(data)
max = np.max(data)
Expand Down Expand Up @@ -38,14 +38,25 @@ def _descriptive_statistics(data: Union[rasterio.io.DatasetReader, pd.DataFrame,


@beartype
def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str) -> dict:
"""Generate descriptive statistics from vector data.
def descriptive_statistics_dataframe(
input_data: Union[pd.DataFrame, gpd.GeoDataFrame], column: str
) -> Dict[str, float]:
"""Compute descriptive statistics from vector data.
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
Computes the following statistics:
- min
- max
- mean
- quantiles 25%
- quantile 50% (median)
- quantile 75%
- standard deviation
- relative standard deviation
- skewness
Args:
input_data: Data to generate descriptive statistics from.
column: Specify the column to generate descriptive statistics from.
input_data: Input vector data.
column: Column in vector data to compute descriptive statistics from.
Returns:
The descriptive statistics in previously described order.
Expand All @@ -58,19 +69,33 @@ def descriptive_statistics_dataframe(input_data: Union[pd.DataFrame, gpd.GeoData


@beartype
def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader) -> dict:
"""Generate descriptive statistics from raster data.
def descriptive_statistics_raster(input_data: rasterio.io.DatasetReader, band: int = 1) -> Dict[str, float]:
"""Compute descriptive statistics from raster data.
Computes the following statistics:
- min
- max
- mean
- quantiles 25%
- quantile 50% (median)
- quantile 75%
- standard deviation
- relative standard deviation
- skewness
Generates min, max, mean, quantiles(25%, 50% and 75%), standard deviation, relative standard deviation and skewness.
Nodata values are removed from the data before the statistics are computed.
Args:
input_data: Data to generate descriptive statistics from.
input_data: Input raster data.
band: Raster band to compute descriptive statistics from.
Returns:
The descriptive statistics in previously described order.
"""
data = input_data.read().flatten()
if band not in range(1, input_data.count + 1):
raise InvalidRasterBandException(f"Input raster does not contain the selected band: {band}.")

data = input_data.read(band)
nodata_value = input_data.nodata
data = data[data != nodata_value]
statistics = _descriptive_statistics(data)
Expand Down
2 changes: 1 addition & 1 deletion tests/exploratory_analyses/descriptive_statistics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_descriptive_statistics_geodataframe():

def test_descriptive_statistics_raster():
"""Checks that returned statistics are correct when using numpy.ndarray."""
test = descriptive_statistics_raster(src_raster)
test = descriptive_statistics_raster(src_raster, 1)
np.testing.assert_almost_equal(test["min"], 2.503)
np.testing.assert_almost_equal(test["max"], 9.67)
np.testing.assert_almost_equal(test["mean"], 5.1865644)
Expand Down

0 comments on commit d36dad2

Please sign in to comment.