rayference · nollety · Jul 17, 2023 · Jul 17, 2023
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* `molecules` parameter to `joseki.make` to select the molecules to be 
+  included in the profile.
+* `select_molecules` method in `joseki.profiles.core` to select the
+  molecules to be included in the profile.
+* `drop_molecules` accessor method to drop mole fraction data for specified 
+  molecules.
 * Test that surface pressure is used to rescale pressure profile.
 * Parameter pressure_data to `joseki.profiles.from_cams_reanalysis` to 
   indicate how to compute the pressure profile (either with or without
@@ -36,6 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+* Rename *volume (mixing) fraction* -> *mole fraction*.
 * Updated `joseki.units` tests.
 * `joseki.units.to_quantity` is dispatched against `pint.Quantity`, `dict`, 
   `int`, `float`, `list`, `numpy.ndarray` and `xarray.DataArray`.

diff --git a/docs/how-to-guides.md b/docs/how-to-guides.md
@@ -112,11 +112,28 @@ cells-represented profile.
 Similarly to [when specifying the altitude grid](#altitude-grid), you can 
 ensure that column densities are conserved with the `conserve_column` parameter.
 
+## Molecules selection
+
+You might be interested only in the mole fraction data of specific molecules.
+To select the molecules you want to be included in your profile, specify them
+with the `molecules` parameter:
+
+```python
+ds = joseki.make(
+    identifier="afgl_1986-us_standard",
+    molecules=["H2O", "CO2", "O3"],
+)
+```
+
+In the above example, the mole fraction data covers the molecules H2O, CO2 and 
+O3 only.
+
+
 ## Advanced options
 
 The collection of atmospheric profiles defined by
-[Anderson et al (1986)](bibliography.md#Anderson+1986) includes volume mixing
-ratio data for 28 molecules, where molecules 8-28 are described as *additional*.
+[Anderson et al (1986)](bibliography.md#Anderson+1986) includes mole fraction
+data for 28 molecules, where molecules 8-28 are described as *additional*.
 By default, these additional molecules are included in the atmospheric profile.
 To discard these additional molecules, set the `additional_molecules`
 parameter to `False`:
@@ -199,7 +216,7 @@ molecules.
 
 You can easily make a plot of any of the variables of a dataset, i.e.,
 air pressure (``p``), air temperature (``t``), air number density (``n``) or
-volume fraction (``x_*``):
+mole fraction (``x_*``):
 
 ??? example "Pressure plot"
 
@@ -267,7 +284,7 @@ volume fraction (``x_*``):
           xscale="log",
        )
 
-    plt.xlabel("volume fraction [dimensionless]")
+    plt.xlabel("mole fraction [dimensionless]")
     plt.legend(ds.joseki.molecules)
     plt.show()
     ```

diff --git a/src/joseki/accessor.py b/src/joseki/accessor.py
@@ -95,7 +95,7 @@ def column_number_density(
             where
 
             * $z$ is the altitude,
-            * $x_{\mathrm{M}}(z)$ is the volume mixing ratio of molecule M
+            * $x_{\mathrm{M}}(z)$ is the mole fraction of molecule M
             at altitude $z$,
             * $n(z)$ is the air number density at altitude $z$,
             * $n_{\mathrm{M}}(z)$ is the number density of molecule M at
@@ -205,13 +205,13 @@ def mass_density_at_sea_level(
         }
 
     @property
-    def volume_fraction_at_sea_level(
+    def mole_fraction_at_sea_level(
         self,
     ) -> t.Dict[str, pint.Quantity]:
-        """Compute volume fraction at sea level.
+        """Compute mole fraction at sea level.
 
         Returns:
-            A mapping of molecule and volume mixing fraction at sea level.
+            A mapping of molecule and mole fraction at sea level.
         """
         ds = self._obj
         return {
@@ -220,20 +220,20 @@ def volume_fraction_at_sea_level(
         }
 
     @property
-    def volume_fraction(self) -> xr.DataArray:
-        """Extract volume fraction and tabulate as a function of (m, z).
+    def mole_fraction(self) -> xr.DataArray:
+        """Extract mole fraction and tabulate as a function of (m, z).
 
         Returns:
-            Volume fraction.
+            Mole fraction.
         """
         ds = self._obj
         molecules = self.molecules
         concatenated = xr.concat([ds[f"x_{m}"] for m in molecules], dim="m")
         concatenated["m"] = ("m", molecules, {"long_name": "molecule"})
         concatenated.attrs.update(
             {
-                "standard_name": "volume_fraction",
-                "long_name": "volume fraction",
+                "standard_name": "mole_fraction",
+                "long_name": "mole fraction",
                 "units": "dimensionless",
             }
         )
@@ -247,7 +247,7 @@ def mass_fraction(self) -> xr.DataArray:
         Returns:
             Mass fraction.
         """
-        x = self.volume_fraction
+        x = self.mole_fraction
         m_air = self.air_molar_mass
         m = molar_mass(molecules=self.molecules)
         y = (x * m / m_air).rename("y")
@@ -281,13 +281,13 @@ def air_molar_mass(self) -> xr.DataArray:
             $$
 
             where
-            * $x_{\mathrm{M}}$ is the volume fraction of molecule M,
+            * $x_{\mathrm{M}}$ is the mole fraction of molecule M,
             * $m_{\mathrm{M}}$ is the molar mass of molecule M.
 
-            To compute the air molar mass accurately, the volume fraction of
+            To compute the air molar mass accurately, the mole fraction of
             molecular nitrogen (N2), molecular oxygen (O2), and argon (Ar) are
             required. If these are not present in the dataset, they are
-            computed using the assumption that the volume fraction of these
+            computed using the assumption that the mole fraction of these
             molecules are constant with altitude and set to the following
             values:
 
@@ -297,9 +297,9 @@ def air_molar_mass(self) -> xr.DataArray:
 
             are independent of altitude.
 
-            Since nothing garantees that the volume fraction sum is equal to
-            one, the air molar mass is computed as the sum of the volume
-            fraction weighted molar mass divided by the sum of the volume
+            Since nothing garantees that the mole fraction sum is equal to
+            one, the air molar mass is computed as the sum of the mole
+            fraction weighted molar mass divided by the sum of the mole
             fraction.
         """
         ds = self._obj
@@ -314,7 +314,7 @@ def air_molar_mass(self) -> xr.DataArray:
                 ds_copy[f"x_{m}"].attrs.update({"units": "dimensionless"})
 
         # compute air molar mass
-        x = ds_copy.joseki.volume_fraction
+        x = ds_copy.joseki.mole_fraction
         molecules = x.m.values
         mm = xr.DataArray(
             data=np.array([MM[m] for m in molecules]),
@@ -357,7 +357,7 @@ def scaling_factors(
             * a column mass density [`mass * length^-2`],
             * a number densitx at sea level [`length^-3`],
             * a mass density at sea level [`mass * length^-3`],
-            * a volume mixing fraction at sea level [`dimensionless`]
+            * a mole mixing fraction at sea level [`dimensionless`]
 
             The scaling factor is then evaluated as the ratio of the target
             amount with the original amount, for each molecule.
@@ -370,7 +370,7 @@ def scaling_factors(
             "[mass] * [length]^-2": self.column_mass_density,
             "[length]^-3": self.number_density_at_sea_level,
             "[mass] * [length]^-3": self.mass_density_at_sea_level,
-            "": self.volume_fraction_at_sea_level,
+            "": self.mole_fraction_at_sea_level,
         }
         factors = {}
         for m, target_amount in target.items():
@@ -394,7 +394,7 @@ def rescale(
 
         Args:
             factors: A mapping of molecule and scaling factor.
-            check_x_sum: if True, check that volume fraction sums
+            check_x_sum: if True, check that mole fraction sums
                 are never larger than one.
         Raises:
             ValueError: if `check_x_sum` is `True` and the 
@@ -405,7 +405,7 @@ def rescale(
         """
         ds = self._obj
 
-        # update volume fraction
+        # update mole fraction
         x_new = {}
         for m in factors:
             with xr.set_options(keep_attrs=True):
@@ -420,10 +420,10 @@ def rescale(
             raise ValueError("Cannot rescale") from e
 
         # update history attribute
-        now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
         for m in factors.keys():
             ds.attrs["history"] += (
-                f"\n{now} - rescaled {m}'s volume mixing ratio using a scaling "
+                f"\n{now} - rescaled {m}'s mole fraction using a scaling "
                 f"factor of {factors[m]:.3f} - joseki, version {__version__}"
             )
 
@@ -435,15 +435,15 @@ def rescale_to(
         check_x_sum: bool = False,
     ) -> xr.Dataset:
         """
-        Rescale volume fractions to match target molecular total column 
+        Rescale mole fractions to match target molecular total column 
         densities.
 
         Args:
             target: Mapping of molecule and target total column density. 
                 Total column must be either a column number density 
                 [`length^-2`], a column mass density [`mass * length^-2`], a 
                 number densitx at sea level [`length^-3`], a mass density at 
-                sea level [`mass * length^-3`], a volume mixing fraction at 
+                sea level [`mass * length^-3`], a mole fraction at 
                 sea level [`dimensionless`].
 
         Returns:
@@ -454,6 +454,30 @@ def rescale_to(
             check_x_sum=check_x_sum,
         )
 
+    def drop_molecules(
+        self,
+        molecules: t.List[str],
+    ) -> xr.Dataset:
+        """Drop molecules from dataset.
+
+        Args:
+            molecules: List of molecules to drop.
+
+        Returns:
+            Dataset with molecules dropped.
+        """
+        ds = self._obj
+
+        # update history attribute
+        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
+
+        ds.attrs["history"] += (
+            f"\n{now} - dropped mole fraction data for molecules "
+            f"{', '.join(molecules)} - joseki, version {__version__}"
+        )
+
+        return ds.drop_vars([f"x_{m}" for m in molecules])
+
     def validate(
         self,
         check_x_sum: bool = False,

diff --git a/src/joseki/core.py b/src/joseki/core.py
@@ -1,4 +1,6 @@
 """Core module."""
+from __future__ import annotations
+
 import logging
 import os
 import typing as t
@@ -7,17 +9,22 @@
 import xarray as xr
 
 from .profiles.factory import factory
-from .profiles.core import represent_profile_in_cells, DEFAULT_METHOD
+from .profiles.core import (
+    DEFAULT_METHOD,
+    represent_profile_in_cells,
+    select_molecules,
+)
 
 logger = logging.getLogger(__name__)
 
 
 def make(
     identifier: str,
-    z: t.Optional[pint.Quantity] = None,
-    interp_method: t.Optional[t.Mapping[str, str]] = DEFAULT_METHOD,
+    z: pint.Quantity | None = None,
+    interp_method: t.Mapping[str, str] | None = DEFAULT_METHOD,
     represent_in_cells: bool = False,
     conserve_column: bool = False,
+    molecules: t.List[str] | None = None,
     **kwargs: t.Any,
 ) -> xr.Dataset:
     """
@@ -32,6 +39,7 @@ def make(
             interpolated profile.
         conserve_column: If `True`, ensure that column densities are conserved
             during interpolation.
+        molecules: List of molecules to include in the profile.
         kwargs: Additional keyword arguments passed to the profile constructor.
 
     Returns:
@@ -42,6 +50,7 @@ def make(
     logger.debug("interp_method: %s", interp_method)
     logger.debug("represent_in_cells: %s", represent_in_cells)
     logger.debug("conserve_column: %s", conserve_column)
+    logger.debug("molecules: %s", molecules)
     logger.debug("kwargs: %s", kwargs)
 
     profile = factory.create(identifier)
@@ -61,6 +70,9 @@ def make(
             conserve_column=conserve_column,
         )
 
+    if molecules is not None:
+        ds = select_molecules(ds, molecules)
+
     return ds
 
 
@@ -97,4 +109,4 @@ def identifiers() -> t.List[str]:
     Returns:
         List of all registered profile identifiers.
     """
-    return factory.registered_identifiers
+    return factory.registered_identifiers
diff --git a/src/joseki/profiles/afgl_1986.py b/src/joseki/profiles/afgl_1986.py
@@ -153,7 +153,7 @@ def dataframe_to_dataset(
     for s in molecules:
         data_vars[f"x_{s}"] = (
             df[s].values * ureg.ppm
-        )  # raw data volume fraction are given in ppmv
+        )  # raw data mole fraction are given in ppmv
 
     # attributes
     pretty_identifier = f"AFGL (1986) {identifier.value.replace('_', '-')}"

diff --git a/src/joseki/profiles/cams.py b/src/joseki/profiles/cams.py
@@ -20,7 +20,7 @@
 import xarray as xr
 
 from ..units import ureg, to_quantity
-from .util import mass_fraction_to_volume_fraction3, to_m_suffixed_data
+from .util import mass_fraction_to_mole_fraction3, to_m_suffixed_data
 from .schema import schema
 from .core import DEFAULT_METHOD
 from .core import regularize as _regularize, extrapolate as _extrapolate
@@ -717,11 +717,11 @@ def mole_fractions(ds: xr.Dataset) -> xr.DataArray:
         }
     )
 
-    # Convert mass mixing ratio to volume mixing ratio
-    x = mass_fraction_to_volume_fraction3(y=y)
+    # Convert mass mixing ratio to mole mixing ratio
+    x = mass_fraction_to_mole_fraction3(y=y)
 
     # Re-organise the `xarray.DataArray` into a `xarray.Dataset` with one 
-    # data variable per molecule volume fraction
+    # data variable per molecule mole fraction
     return to_m_suffixed_data(x)