Molecules selection

rayference · Jul 17, 2023 · fea9cab · fea9cab
1 parent 5baba26
commit fea9cab
Show file tree

Hide file tree

Showing 8 changed files with 141 additions and 9 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* molecules parameter to `joseki.make` to select the desired molecules to be 
+  included in the profile.
+* `select_molecules` method in `joseki.profiles.core` to select the desired
+  molecules to be included in the profile.
+* `drop_molecules` accessor method to drop mole fraction data for specified 
+  molecules.
 * Test that surface pressure is used to rescale pressure profile.
 * Parameter pressure_data to `joseki.profiles.from_cams_reanalysis` to 
   indicate how to compute the pressure profile (either with or without

diff --git a/docs/how-to-guides.md b/docs/how-to-guides.md
@@ -112,11 +112,28 @@ cells-represented profile.
 Similarly to [when specifying the altitude grid](#altitude-grid), you can 
 ensure that column densities are conserved with the `conserve_column` parameter.
 
+## Molecules selection
+
+You might be interested only in the mole fraction data of specific molecules.
+To select the molecules you want to be included in your profile, specify them
+with the `molecules` parameter:
+
+```python
+ds = joseki.make(
+    identifier="afgl_1986-us_standard",
+    molecules=["H2O", "CO2", "O3"],
+)
+```
+
+In the above example, the mole fraction data covers the molecules H2O, CO2 and 
+O3 only.
+
+
 ## Advanced options
 
 The collection of atmospheric profiles defined by
-[Anderson et al (1986)](bibliography.md#Anderson+1986) includes volume mixing
-ratio data for 28 molecules, where molecules 8-28 are described as *additional*.
+[Anderson et al (1986)](bibliography.md#Anderson+1986) includes mole fraction
+data for 28 molecules, where molecules 8-28 are described as *additional*.
 By default, these additional molecules are included in the atmospheric profile.
 To discard these additional molecules, set the `additional_molecules`
 parameter to `False`:

diff --git a/src/joseki/accessor.py b/src/joseki/accessor.py
@@ -420,7 +420,7 @@ def rescale(
             raise ValueError("Cannot rescale") from e
 
         # update history attribute
-        now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
         for m in factors.keys():
             ds.attrs["history"] += (
                 f"\n{now} - rescaled {m}'s volume mixing ratio using a scaling "
@@ -454,6 +454,30 @@ def rescale_to(
             check_x_sum=check_x_sum,
         )
 
+    def drop_molecules(
+        self,
+        molecules: t.List[str],
+    ) -> xr.Dataset:
+        """Drop molecules from dataset.
+
+        Args:
+            molecules: List of molecules to drop.
+
+        Returns:
+            Dataset with molecules dropped.
+        """
+        ds = self._obj
+
+        # update history attribute
+        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
+
+        ds.attrs["history"] += (
+            f"\n{now} - dropped mole fraction data for molecules "
+            f"{', '.join(molecules)} - joseki, version {__version__}"
+        )
+
+        return ds.drop_vars([f"x_{m}" for m in molecules])
+
     def validate(
         self,
         check_x_sum: bool = False,

diff --git a/src/joseki/core.py b/src/joseki/core.py
@@ -1,4 +1,6 @@
 """Core module."""
+from __future__ import annotations
+
 import logging
 import os
 import typing as t
@@ -7,17 +9,22 @@
 import xarray as xr
 
 from .profiles.factory import factory
-from .profiles.core import represent_profile_in_cells, DEFAULT_METHOD
+from .profiles.core import (
+    DEFAULT_METHOD,
+    represent_profile_in_cells,
+    select_molecules,
+)
 
 logger = logging.getLogger(__name__)
 
 
 def make(
     identifier: str,
-    z: t.Optional[pint.Quantity] = None,
-    interp_method: t.Optional[t.Mapping[str, str]] = DEFAULT_METHOD,
+    z: pint.Quantity | None = None,
+    interp_method: t.Mapping[str, str] | None = DEFAULT_METHOD,
     represent_in_cells: bool = False,
     conserve_column: bool = False,
+    molecules: t.List[str] | None = None,
     **kwargs: t.Any,
 ) -> xr.Dataset:
     """
@@ -32,6 +39,7 @@ def make(
             interpolated profile.
         conserve_column: If `True`, ensure that column densities are conserved
             during interpolation.
+        molecules: List of molecules to include in the profile.
         kwargs: Additional keyword arguments passed to the profile constructor.
     
     Returns:
@@ -42,6 +50,7 @@ def make(
     logger.debug("interp_method: %s", interp_method)
     logger.debug("represent_in_cells: %s", represent_in_cells)
     logger.debug("conserve_column: %s", conserve_column)
+    logger.debug("molecules: %s", molecules)
     logger.debug("kwargs: %s", kwargs)
 
     profile = factory.create(identifier)
@@ -61,6 +70,9 @@ def make(
             conserve_column=conserve_column,
         )
 
+    if molecules is not None:
+        ds = select_molecules(ds, molecules)
+
     return ds
 
 
@@ -97,4 +109,4 @@ def identifiers() -> t.List[str]:
     Returns:
         List of all registered profile identifiers.
     """
-    return factory.registered_identifiers
+    return factory.registered_identifiers
diff --git a/src/joseki/profiles/core.py b/src/joseki/profiles/core.py
@@ -348,6 +348,32 @@ def regularize(
         **kwargs,
     )
 
+
+def select_molecules(
+    ds: xr.Dataset,
+    molecules: t.List[str],
+) -> xr.Dataset:
+    """
+    Select specified molecules in the profile.
+
+    Args:
+        ds: Initial atmospheric profile.
+        molecules: List of molecules to select.
+    
+    Returns:
+        Atmospheric profile with exactly the specified molecules.
+    """
+    drop_molecules = [m for m in ds.joseki.molecules if m not in molecules]
+    ds_dropped = ds.joseki.drop_molecules(drop_molecules)
+
+    if all([m in ds_dropped.joseki.molecules for m in molecules]):
+        return ds_dropped
+    else:
+        raise ValueError(
+            f"Could not select molecules {molecules}, "
+            f"available molecules are {ds.joseki.molecules}."
+        )
+
 @define
 class Profile(ABC):
     """

diff --git a/tests/profiles/test_core.py b/tests/profiles/test_core.py
@@ -6,7 +6,13 @@
 
 from joseki import unit_registry as ureg
 from joseki.core import make
-from joseki.profiles.core import rescale_to_column, interp, extrapolate, regularize
+from joseki.profiles.core import (
+    rescale_to_column,
+    interp,
+    extrapolate,
+    regularize,
+    select_molecules,
+)
 from joseki.core import represent_profile_in_cells
 from joseki.units import to_quantity
 
@@ -222,3 +228,25 @@ def test_regularize_num(test_data_set: xr.Dataset):
     )
 
     assert regularized.z.size == num
+
+
+def test_select_molecules(test_data_set: xr.Dataset):
+    """Select molecules."""
+    selected = select_molecules(
+        ds=test_data_set,
+        molecules=["H2O", "CO2"],
+    )
+
+    assert "H2O" in selected.joseki.molecules
+    assert "CO2" in selected.joseki.molecules
+    assert "O3" not in selected.joseki.molecules
+
+def test_select_molecules_invalid(test_data_set: xr.Dataset):
+    """Raise when selected molecules are not available."""
+    molecules = ["SO2", "NO2"]
+    ds = test_data_set.drop([f"x_{m}" for m in molecules])
+    with pytest.raises(ValueError):
+        select_molecules(
+            ds=ds,
+            molecules=["H2O", "CO2", "SO2", "NO2"],
+        )
diff --git a/tests/test_accessor.py b/tests/test_accessor.py
@@ -195,3 +195,17 @@ def test_rescale_to_column_mass_density():
         target["H2O"].m,
         significant=3,
     )
+
+@pytest.mark.parametrize(
+    "molecules",
+    [
+        ["H2O"],
+        ["H2O", "CO2"],
+    ]
+)
+def test_drop_molecules(molecules):
+    """x_M data variable is(are) dropped."""
+    ds = joseki.make("afgl_1986-midlatitude_summer")
+    assert all([m in ds.joseki.molecules for m in molecules])
+    dropped = ds.joseki.drop_molecules(molecules)
+    assert all([m not in dropped.joseki.molecules for m in molecules])
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -127,6 +127,11 @@ def test_make_conserve_column_3():
             significant=6
         )
 
+def test_select_molecules():
+    """Returns xr.Dataset."""
+    ds = make(identifier="afgl_1986-tropical", molecules=["H2O", "CO2"])
+    assert ds.joseki.molecules == ["H2O", "CO2"]
+
 def test_open_dataset(tmpdir):
     """Returns xr.Dataset."""
     ds = make(identifier="afgl_1986-tropical")
@@ -145,4 +150,4 @@ def test_load_dataset(tmpdir):
 
 def test_identifiers():
     """Returns list of identifiers."""
-    assert isinstance(identifiers(), list)
+    assert isinstance(identifiers(), list)