tardis-sn · wkerzendorf · Dec 11, 2023 · Nov 16, 2023 · Nov 16, 2023 · Nov 17, 2023
diff --git a/carsus/conftest.py b/carsus/conftest.py
@@ -7,7 +7,7 @@
 
 """
 
-import os
+from pathlib import Path
 
 from astropy.version import version as astropy_version
 
@@ -44,7 +44,7 @@ def pytest_configure(config):
 
         from . import __version__
 
-        packagename = os.path.basename(os.path.dirname(__file__))
+        packagename = Path(__file__).parent.name
         TESTED_VERSIONS[packagename] = __version__
 
 
@@ -103,7 +103,7 @@ def memory_session():
 
 @pytest.fixture(scope="session")
 def data_dir():
-    return os.path.join(os.path.dirname(__file__), "tests", "data")
+    return Path(__file__).parent / "tests" / "data"
 
 
 @pytest.fixture(scope="session")
@@ -112,7 +112,7 @@ def test_db_fname(request):
     if test_db_fname is None:
         pytest.skip("--testing database was not specified")
     else:
-        return os.path.expandvars(os.path.expanduser(test_db_fname))
+        return str(Path(test_db_fname).expanduser().resolve())
 
 
 @pytest.fixture(scope="session")
@@ -122,24 +122,29 @@ def test_db_url(test_db_fname):
 
 @pytest.fixture(scope="session")
 def gfall_fname(data_dir):
-    return os.path.join(data_dir, "gftest.all")  # Be III, B IV, N VI
+    return str(data_dir / "gftest.all")  # Be III, B IV, N VI
 
 
 @pytest.fixture(scope="session")
-def gfall_http(data_dir):
+def gfall_http():
     url = "https://github.com/tardis-sn/carsus/"
     url += "master/carsus/tests/data/gftest.all"
     return url
 
 
 @pytest.fixture(scope="session")
 def vald_fname(data_dir):
-    return os.path.join(data_dir, "valdtest.dat")
+    return str(data_dir / "valdtest.dat")
+
+
+@pytest.fixture(scope="session")
+def vald_short_form_stellar_fname(data_dir):
+    return str(data_dir / "vald_shortlist_test.dat")
 
 
 @pytest.fixture(scope="session")
 def nndc_dirname(data_dir):
-    return os.path.join(data_dir, "nndc")  # Mn-52, Ni-56
+    return str(data_dir / "nndc")  # Mn-52, Ni-56
 
 
 @pytest.fixture(scope="session")
@@ -179,4 +184,4 @@ def refdata_path(request):
     if refdata_path is None:
         pytest.skip("--refdata folder path was not specified")
     else:
-        return os.path.expandvars(os.path.expanduser(refdata_path))
+        return str(Path(refdata_path).expanduser().resolve())
diff --git a/carsus/io/tests/test_vald.py b/carsus/io/tests/test_vald.py
@@ -39,6 +39,18 @@ def vald_linelist(vald_rdr):
     return vald_rdr.linelist
 
 
+@pytest.fixture()
+def vald_rdr_short_form_stellar(vald_short_form_stellar_fname):
+    return VALDReader(
+        fname=vald_short_form_stellar_fname, strip_molecules=False, shortlist=True
+    )
+
+
+@pytest.fixture()
+def vald_linelist_short_form_stellar(vald_rdr_short_form_stellar):
+    return vald_rdr_short_form_stellar.linelist
+
+
 @pytest.mark.parametrize(
     "index, wl_air, log_gf, e_low, e_up",
     [
@@ -48,7 +60,7 @@ def vald_linelist(vald_rdr):
 )
 def test_vald_reader_vald_raw(vald_raw, index, wl_air, log_gf, e_low, e_up):
     row = vald_raw.loc[index]
-    assert_almost_equal(row["wl_air"], wl_air)
+    assert_almost_equal(row["WL_air(A)"], wl_air)
     assert_allclose([row["log_gf"], row["e_low"], row["e_up"]], [log_gf, e_low, e_up])
 
 
@@ -61,7 +73,7 @@ def test_vald_reader_vald_raw(vald_raw, index, wl_air, log_gf, e_low, e_up):
 )
 def test_vald_reader_vald(vald, index, wl_air, log_gf, e_low, e_up, ion_charge):
     row = vald.loc[index]
-    assert_almost_equal(row["wl_air"], wl_air)
+    assert_almost_equal(row["WL_air(A)"], wl_air)
     assert_allclose(
         [row["log_gf"], row["e_low"], row["e_up"], row["ion_charge"]],
         [log_gf, e_low, e_up, ion_charge],
@@ -115,3 +127,33 @@ def test_vald_linelist(vald_linelist):
     )
     # Test to see if any values have become nan in new columns
     assert ~vald_linelist.isna().values.any()
+
+
+@pytest.mark.parametrize(
+    "index, wavelength, log_gf, e_low, v_mic, ion_charge",
+    [
+        (73, 5001.40537184386, -1.563, 0.5786, 1, 0),
+        (17, 5001.397869850396, -6.421, 7.1801, 1, 0),
+    ],
+)
+def test_vald_short_stellar_linelist(
+    vald_linelist_short_form_stellar,
+    index,
+    wavelength,
+    log_gf,
+    e_low,
+    v_mic,
+    ion_charge,
+):
+    assert len(vald_linelist_short_form_stellar) == 95
+    row = vald_linelist_short_form_stellar.iloc[index]
+    assert_almost_equal(row["wavelength"], wavelength)
+    assert_allclose(
+        [
+            row["log_gf"],
+            row["e_low"],
+            row["v_mic"],
+            row["ion_charge"],
+        ],
+        [log_gf, e_low, v_mic, ion_charge],
+    )
diff --git a/carsus/io/vald/vald.py b/carsus/io/vald/vald.py
@@ -9,10 +9,9 @@
     ATOMIC_SYMBOLS_DATA,
     convert_symbol2atomic_number,
 )
+from astropy import units as u
 
 
-VALD_URL = "https://media.githubusercontent.com/media/tardis-sn/carsus-db/master/vald/vald_sample.dat"
-
 logger = logging.getLogger(__name__)
 
 
@@ -25,19 +24,23 @@ class VALDReader(object):
     fname: str
         path to vald data file
     strip_molecules: bool
-        Whether to remove molecules from the data.
+        Whether to remove molecules from the data. Defaults to True.
+    shortlist: bool
+        Whether the parsed file is a shortlist or not.
 
 
     Methods
     --------
     vald_raw:
         Return pandas DataFrame representation of vald
+    linelist:
+        Return pandas DataFrame representation of linelist properties necessary to compute line opacities
 
     """
 
     vald_columns = [
         "elm_ion",
-        "wl_air",
+        "wave_unprepared",  # This is the wavelength column header before it is ingested overwritten with appropriate units
         "log_gf",
         "e_low",
         "j_lo",
@@ -51,24 +54,47 @@ class VALDReader(object):
         "waals",
     ]
 
-    def __init__(self, fname=None, strip_molecules=True):
+    vald_shortlist_columns = [
+        "elm_ion",
+        "wave_unprepared",  # This is the wavelength column header before it is ingested overwritten with appropriate units
+        "e_low",
+        "log_gf",
+        "rad",
+        "stark",
+        "waals",
+        "lande_factor",
+        "central_depth",
+        "reference",
+    ]
+
+    def __init__(self, fname=None, strip_molecules=True, shortlist=False):
         """
         Parameters
         ----------
         fname: str
             Path to the vald file (http or local file).
         strip_molecules: bool
             Whether to remove molecules from the data.
-
+        shortlist: bool
+            Whether the parsed file is a shortlist or not.
         """
 
-        self.fname = VALD_URL if fname is None else fname
+        assert fname is not None, "fname must be specified"
+        self.fname = fname
 
         self._vald_raw = None
         self._vald = None
         self._linelist = None
+        self._stellar_linelist = False
+
+        self._vald_columns = (
+            self.vald_shortlist_columns.copy()
+            if shortlist
+            else self.vald_columns.copy()
+        )
 
         self.strip_molecules = strip_molecules
+        self.shortlist = shortlist
 
     @property
     def vald_raw(self):
@@ -126,15 +152,34 @@ def read_vald_raw(self, fname=None):
         # Elm Ion       WL_air(A)  log gf* E_low(eV) J lo  E_up(eV) J up   lower   upper    mean   Rad.  Stark    Waals
         # 'TiO 1',     4100.00020, -11.472,  0.2011, 31.0,  3.2242, 32.0, 99.000, 99.000, 99.000, 6.962, 0.000, 0.000,
 
-        data_match = re.compile("'[a-zA-Z]+ \d+',[\s*-?\d+[\.\d+]+,]*")
+        DATA_RE_PATTERN = re.compile("'[a-zA-Z]+ \d+',[\s*-?\d+[\.\d+]+,]*")
 
         buffer, checksum = read_from_buffer(self.fname)
+        content = buffer.read().decode()
+
+        # Need to identify the wavelength column header and overwrite the wavelength to obtain units and air or vacuum
+        # Also need to identify if Vmic is in the columns for correct column construction
+        for line in content.split("\n")[:10]:
+            if "WL" in line:
+                for column_header in line.split():
+                    if "WL" in column_header:
+                        self._vald_columns[1] = column_header
+                        logger.info(f"Found wavelength column header: {column_header}")
+            if "Vmicro" in line and self._stellar_linelist == False:
+                logger.info("Found Vmic column - This is a stellar vald linelist")
+                self._vald_columns.insert(3, "v_mic")
+
+                self._stellar_linelist = True
+
         vald = pd.read_csv(
-            StringIO("\n".join(data_match.findall(buffer.read().decode()))),
-            names=self.vald_columns,
+            StringIO("\n".join(DATA_RE_PATTERN.findall(content))),
+            names=self._vald_columns,
             index_col=False,
         )
 
+        if self.shortlist:
+            del vald["reference"]
+
         return vald, checksum
 
     def parse_vald(self, vald_raw=None, strip_molecules=True):
@@ -158,7 +203,19 @@ def parse_vald(self, vald_raw=None, strip_molecules=True):
         vald["elm_ion"] = vald["elm_ion"].str.replace("'", "")
         vald[["chemical", "ion_charge"]] = vald["elm_ion"].str.split(" ", expand=True)
         vald["ion_charge"] = vald["ion_charge"].astype(int) - 1
-        vald["wavelength"] = convert_wavelength_air2vacuum(vald["wl_air"])
+
+        wave = vald.columns[1]
+        if "nm" in wave:
+            if "air" in wave:
+                vald["wavelength"] = convert_wavelength_air2vacuum(
+                    (vald[wave].values * u.nm).to(u.AA)
+                )
+            else:
+                vald["wavelength"] = (vald[wave].values * u.nm).to(u.AA)
+        elif "air" in wave:
+            vald["wavelength"] = convert_wavelength_air2vacuum(vald[wave])
+        else:
+            vald["wavelength"] = vald[wave]
 
         del vald["elm_ion"]
 
@@ -191,40 +248,42 @@ def extract_linelist(self, vald):
         -------
             pandas.DataFrame
                 vald linelist containing only the following columns:
-                atomic_number or chemical, ion_charge, wavelength, log_gf, rad, stark, waals
+                atomic_number or chemical, ion_charge, wavelength, e_low, log_gf, rad, stark, waals
+                optionally: v_mic (if stellar linelist) and e_up, j_lo, j_up (if not shortlist)
         """
-        if self.strip_molecules:
-            return vald[
-                [
-                    "atomic_number",
-                    "ion_charge",
-                    "wavelength",
-                    "log_gf",
-                    "e_low",
-                    "e_up",
-                    "j_lo",
-                    "j_up",
-                    "rad",
-                    "stark",
-                    "waals",
-                ]
-            ].copy()
+        if self.shortlist:
+            linelist_mask = [
+                "chemical",
+                "ion_charge",
+                "wavelength",
+                "log_gf",
+                "e_low",
+                "rad",
+                "stark",
+                "waals",
+            ]
+            if self._stellar_linelist:
+                linelist_mask.insert(5, "v_mic")
+
         else:
-            return vald[
-                [
-                    "chemical",
-                    "ion_charge",
-                    "wavelength",
-                    "log_gf",
-                    "e_low",
-                    "e_up",
-                    "j_lo",
-                    "j_up",
-                    "rad",
-                    "stark",
-                    "waals",
-                ]
-            ].copy()
+            linelist_mask = [
+                "chemical",
+                "ion_charge",
+                "wavelength",
+                "log_gf",
+                "e_low",
+                "e_up",
+                "j_lo",
+                "j_up",
+                "rad",
+                "stark",
+                "waals",
+            ]
+
+        if self.strip_molecules:
+            linelist_mask[0] = "atomic_number"
+
+        return vald[linelist_mask].copy()
 
     def to_hdf(self, fname):
         """
@@ -236,3 +295,4 @@ def to_hdf(self, fname):
         with pd.HDFStore(fname, "w") as f:
             f.put("/vald_raw", self.vald_raw)
             f.put("/vald", self.vald)
+            f.put("/linelist", self.linelist)