-
-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add vald shortlist parsing as well as handling for wavelength in vacuum/air and nm/aa #386
Merged
wkerzendorf
merged 17 commits into
tardis-sn:master
from
jvshields:add_vald_short_linelist
Dec 11, 2023
Merged
Changes from 14 commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
163d7e2
add shortlist parsing and handling for wavelength in vacuum/air and n…
jvshields 180186d
fix broken tests
jvshields 288b664
add support for stellar linelists
jvshields a031124
add tests
jvshields a8e035f
cleanup
jvshields e06e68c
add example notebook with documentation
jvshields 6f143f2
improve documentation
jvshields 77d281e
see if pathlib breaks tests
jvshields 2c326cd
change os to pathlib
jvshields 376b776
change paths to strings like os output
jvshields 7388cc8
fix vald test bug, change regex pattern to constant naming convention
jvshields f2bb37a
fix tests
jvshields 5fee3ed
remove VALD_URL
jvshields 9b8a457
remove os import
jvshields 3d181fa
put vald wavelength column handling in external function
jvshields 0b1efcd
make DATA_DIR_PATH constant in conftest.py
jvshields 5293e2e
continue propagating data_dir
jvshields File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,10 +9,9 @@ | |
ATOMIC_SYMBOLS_DATA, | ||
convert_symbol2atomic_number, | ||
) | ||
from astropy import units as u | ||
|
||
|
||
VALD_URL = "https://media.githubusercontent.com/media/tardis-sn/carsus-db/master/vald/vald_sample.dat" | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
|
@@ -25,19 +24,23 @@ class VALDReader(object): | |
fname: str | ||
path to vald data file | ||
strip_molecules: bool | ||
Whether to remove molecules from the data. | ||
Whether to remove molecules from the data. Defaults to True. | ||
shortlist: bool | ||
Whether the parsed file is a shortlist or not. | ||
|
||
|
||
Methods | ||
-------- | ||
vald_raw: | ||
Return pandas DataFrame representation of vald | ||
linelist: | ||
Return pandas DataFrame representation of linelist properties necessary to compute line opacities | ||
|
||
""" | ||
|
||
vald_columns = [ | ||
"elm_ion", | ||
"wl_air", | ||
"wave_unprepared", # This is the wavelength column header before it is ingested overwritten with appropriate units | ||
"log_gf", | ||
"e_low", | ||
"j_lo", | ||
|
@@ -51,24 +54,47 @@ class VALDReader(object): | |
"waals", | ||
] | ||
|
||
def __init__(self, fname=None, strip_molecules=True): | ||
vald_shortlist_columns = [ | ||
"elm_ion", | ||
"wave_unprepared", # This is the wavelength column header before it is ingested overwritten with appropriate units | ||
"e_low", | ||
"log_gf", | ||
"rad", | ||
"stark", | ||
"waals", | ||
"lande_factor", | ||
"central_depth", | ||
"reference", | ||
] | ||
|
||
def __init__(self, fname=None, strip_molecules=True, shortlist=False): | ||
""" | ||
Parameters | ||
---------- | ||
fname: str | ||
Path to the vald file (http or local file). | ||
strip_molecules: bool | ||
Whether to remove molecules from the data. | ||
|
||
shortlist: bool | ||
Whether the parsed file is a shortlist or not. | ||
""" | ||
|
||
self.fname = VALD_URL if fname is None else fname | ||
assert fname is not None, "fname must be specified" | ||
self.fname = fname | ||
|
||
self._vald_raw = None | ||
self._vald = None | ||
self._linelist = None | ||
self._stellar_linelist = False | ||
|
||
self._vald_columns = ( | ||
self.vald_shortlist_columns.copy() | ||
if shortlist | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a big fan of ternary operators, but I also won't argue strongly if you want to keep this one |
||
else self.vald_columns.copy() | ||
) | ||
|
||
self.strip_molecules = strip_molecules | ||
self.shortlist = shortlist | ||
|
||
@property | ||
def vald_raw(self): | ||
|
@@ -126,15 +152,34 @@ def read_vald_raw(self, fname=None): | |
# Elm Ion WL_air(A) log gf* E_low(eV) J lo E_up(eV) J up lower upper mean Rad. Stark Waals | ||
# 'TiO 1', 4100.00020, -11.472, 0.2011, 31.0, 3.2242, 32.0, 99.000, 99.000, 99.000, 6.962, 0.000, 0.000, | ||
|
||
data_match = re.compile("'[a-zA-Z]+ \d+',[\s*-?\d+[\.\d+]+,]*") | ||
DATA_RE_PATTERN = re.compile("'[a-zA-Z]+ \d+',[\s*-?\d+[\.\d+]+,]*") | ||
|
||
buffer, checksum = read_from_buffer(self.fname) | ||
content = buffer.read().decode() | ||
|
||
# Need to identify the wavelength column header and overwrite the wavelength to obtain units and air or vacuum | ||
# Also need to identify if Vmic is in the columns for correct column construction | ||
for line in content.split("\n")[:10]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make an issue about pattern matching. |
||
if "WL" in line: | ||
for column_header in line.split(): | ||
if "WL" in column_header: | ||
self._vald_columns[1] = column_header | ||
logger.info(f"Found wavelength column header: {column_header}") | ||
if "Vmicro" in line and self._stellar_linelist == False: | ||
logger.info("Found Vmic column - This is a stellar vald linelist") | ||
self._vald_columns.insert(3, "v_mic") | ||
|
||
self._stellar_linelist = True | ||
|
||
vald = pd.read_csv( | ||
StringIO("\n".join(data_match.findall(buffer.read().decode()))), | ||
names=self.vald_columns, | ||
StringIO("\n".join(DATA_RE_PATTERN.findall(content))), | ||
names=self._vald_columns, | ||
index_col=False, | ||
) | ||
|
||
if self.shortlist: | ||
del vald["reference"] | ||
|
||
return vald, checksum | ||
|
||
def parse_vald(self, vald_raw=None, strip_molecules=True): | ||
|
@@ -158,7 +203,19 @@ def parse_vald(self, vald_raw=None, strip_molecules=True): | |
vald["elm_ion"] = vald["elm_ion"].str.replace("'", "") | ||
vald[["chemical", "ion_charge"]] = vald["elm_ion"].str.split(" ", expand=True) | ||
vald["ion_charge"] = vald["ion_charge"].astype(int) - 1 | ||
vald["wavelength"] = convert_wavelength_air2vacuum(vald["wl_air"]) | ||
|
||
wave = vald.columns[1] | ||
if "nm" in wave: | ||
jvshields marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if "air" in wave: | ||
vald["wavelength"] = convert_wavelength_air2vacuum( | ||
(vald[wave].values * u.nm).to(u.AA) | ||
) | ||
else: | ||
vald["wavelength"] = (vald[wave].values * u.nm).to(u.AA) | ||
elif "air" in wave: | ||
vald["wavelength"] = convert_wavelength_air2vacuum(vald[wave]) | ||
else: | ||
vald["wavelength"] = vald[wave] | ||
|
||
del vald["elm_ion"] | ||
|
||
|
@@ -191,40 +248,42 @@ def extract_linelist(self, vald): | |
------- | ||
pandas.DataFrame | ||
vald linelist containing only the following columns: | ||
atomic_number or chemical, ion_charge, wavelength, log_gf, rad, stark, waals | ||
atomic_number or chemical, ion_charge, wavelength, e_low, log_gf, rad, stark, waals | ||
optionally: v_mic (if stellar linelist) and e_up, j_lo, j_up (if not shortlist) | ||
""" | ||
if self.strip_molecules: | ||
return vald[ | ||
[ | ||
"atomic_number", | ||
"ion_charge", | ||
"wavelength", | ||
"log_gf", | ||
"e_low", | ||
"e_up", | ||
"j_lo", | ||
"j_up", | ||
"rad", | ||
"stark", | ||
"waals", | ||
] | ||
].copy() | ||
if self.shortlist: | ||
linelist_mask = [ | ||
"chemical", | ||
"ion_charge", | ||
"wavelength", | ||
"log_gf", | ||
"e_low", | ||
"rad", | ||
"stark", | ||
"waals", | ||
] | ||
if self._stellar_linelist: | ||
jvshields marked this conversation as resolved.
Show resolved
Hide resolved
|
||
linelist_mask.insert(5, "v_mic") | ||
|
||
else: | ||
return vald[ | ||
[ | ||
"chemical", | ||
"ion_charge", | ||
"wavelength", | ||
"log_gf", | ||
"e_low", | ||
"e_up", | ||
"j_lo", | ||
"j_up", | ||
"rad", | ||
"stark", | ||
"waals", | ||
] | ||
].copy() | ||
linelist_mask = [ | ||
"chemical", | ||
"ion_charge", | ||
"wavelength", | ||
"log_gf", | ||
"e_low", | ||
"e_up", | ||
"j_lo", | ||
"j_up", | ||
"rad", | ||
"stark", | ||
"waals", | ||
] | ||
|
||
if self.strip_molecules: | ||
linelist_mask[0] = "atomic_number" | ||
|
||
return vald[linelist_mask].copy() | ||
|
||
def to_hdf(self, fname): | ||
""" | ||
|
@@ -236,3 +295,4 @@ def to_hdf(self, fname): | |
with pd.HDFStore(fname, "w") as f: | ||
f.put("/vald_raw", self.vald_raw) | ||
f.put("/vald", self.vald) | ||
f.put("/linelist", self.linelist) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would not make this a fixture but a single constant
DATA_DIR_PATH