Skip to content

Support CDMS all species option; fix format for CDMS linelist reading; fix CDMS quantum numbers parsing #3302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
5 changes: 3 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ linelists.cdms
^^^^^^^^^^^^^^

- Add a keyword to control writing of new species cache files. This is needed to prevent tests from overwriting those files. [#3297]
- Add more complete support for CDMS quantum number and other value parsing. [#3302]

heasarc
^^^^^^^
Expand Down Expand Up @@ -76,10 +77,10 @@ mast

- Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314]

- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs
- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs
to the returned cloud URIs. [#3314]

- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot
- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot
be found in the cloud. [#3314]


Expand Down
115 changes: 73 additions & 42 deletions astroquery/linelists/cdms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# import configurable items declared in __init__.py
from astroquery.linelists.cdms import conf
from astroquery.exceptions import InvalidQueryError, EmptyResponseError
from astroquery import log

import re
import string
Expand Down Expand Up @@ -54,7 +55,8 @@
min_strength : int, optional
Minimum strength in catalog units, the default is -500

molecule : list, string of regex if parse_name_locally=True, optional
molecule : list or string if parse_name_locally=False,
string of regex if parse_name_locally=True, optional
Identifiers of the molecules to search for. If this parameter
is not provided the search will match any species. Default is 'All'.
As a first pass, the molecule will be searched for with a direct
Expand Down Expand Up @@ -134,18 +136,21 @@
# changes interpretation of query
self._last_query_temperature = temperature_for_intensity

if molecule is not None:
if parse_name_locally:
self.lookup_ids = build_lookup()
luts = self.lookup_ids.find(molecule, flags)
if len(luts) == 0:
raise InvalidQueryError('No matching species found. Please '
'refine your search or read the Docs '
'for pointers on how to search.')
payload['Molecules'] = tuple(f"{val:06d} {key}"
for key, val in luts.items())[0]
else:
payload['Molecules'] = molecule
if molecule == 'All':
payload['Moleculesgrp'] = 'all species'

Check warning on line 140 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L140

Added line #L140 was not covered by tests
else:
if molecule is not None:
if parse_name_locally:
self.lookup_ids = build_lookup()
luts = self.lookup_ids.find(molecule, flags)
if len(luts) == 0:
raise InvalidQueryError('No matching species found. Please '

Check warning on line 147 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L147

Added line #L147 was not covered by tests
'refine your search or read the Docs '
'for pointers on how to search.')
payload['Molecules'] = tuple(f"{val:06d} {key}"
for key, val in luts.items())[0]
else:
payload['Molecules'] = molecule

if get_query_payload:
return payload
Expand Down Expand Up @@ -180,7 +185,7 @@
# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa
[y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
if payload['Molecules'] in badlist:
if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist:
raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. "
f"Try get_molecule({payload['Molecules']}) instead.")

Expand Down Expand Up @@ -233,15 +238,32 @@
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.find('pre').text

need_to_filter_bad_molecules = False
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
if text.find(bad_molecule.split()[1]) > -1:
need_to_filter_bad_molecules = True
break

Check warning on line 245 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L244-L245

Added lines #L244 - L245 were not covered by tests
if need_to_filter_bad_molecules:
text_new = ''
text = text.split('\n')
for line in text:
need_to_include_line = True
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
if line.find(bad_molecule.split()[1]) > -1:
need_to_include_line = False
break
if need_to_include_line:
text_new = text_new + '\n' + line
text = text_new

Check warning on line 257 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L247-L257

Added lines #L247 - L257 were not covered by tests

starts = {'FREQ': 0,
'ERR': 14,
'LGINT': 25,
'DR': 36,
'ELO': 38,
'GUP': 47,
'MOLWT': 51,
'TAG': 54,
'QNFMT': 58,
'TAG': 50,
'QNFMT': 57,
'Ju': 61,
'Ku': 63,
'vu': 65,
Expand All @@ -265,6 +287,7 @@
result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz

result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
result['Lab'] = result['MOLWT'] < 0
result['MOLWT'] = np.abs(result['MOLWT'])
result['MOLWT'].unit = u.Da
Expand Down Expand Up @@ -387,7 +410,7 @@

return result

def get_molecule(self, molecule_id, *, cache=True):
def get_molecule(self, molecule_id, *, cache=True, return_response=False):
"""
Retrieve the whole molecule table for a given molecule id
"""
Expand All @@ -396,6 +419,8 @@
url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat'
response = self._request(method='GET', url=url,
timeout=self.TIMEOUT, cache=cache)
if return_response:
return response

Check warning on line 423 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L422-L423

Added lines #L422 - L423 were not covered by tests
result = self._parse_cat(response)

species_table = self.get_species_table()
Expand Down Expand Up @@ -426,21 +451,21 @@
'ELO': 32,
'GUP': 42,
'TAG': 44,
'QNFMT': 52,
'Q1': 56,
'Q2': 58,
'Q3': 60,
'Q4': 62,
'Q5': 64,
'Q6': 66,
'Q7': 68,
'Q8': 70,
'Q9': 72,
'Q10': 74,
'Q11': 76,
'Q12': 78,
'Q13': 80,
'Q14': 82,
'QNFMT': 51,
'Q1': 55,
'Q2': 57,
'Q3': 59,
'Q4': 61,
'Q5': 63,
'Q6': 65,
'Q7': 67,
'Q8': 69,
'Q9': 71,
'Q10': 73,
'Q11': 75,
'Q12': 77,
'Q13': 79,
'Q14': 81,
}

result = ascii.read(text, header_start=None, data_start=0,
Expand All @@ -450,7 +475,7 @@
format='fixed_width', fast_reader=False)

# int truncates - which is what we want
result['MOLWT'] = [int(x/1e4) for x in result['TAG']]
result['MOLWT'] = [int(x/1e3) for x in result['TAG']]

Check warning on line 478 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L478

Added line #L478 was not covered by tests

result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz
Expand All @@ -460,15 +485,18 @@
result['MOLWT'].unit = u.Da

fix_keys = ['GUP']
for suf in '':
for qn in (f'Q{ii}' for ii in range(1, 15)):
qnind = qn+suf
fix_keys.append(qnind)
for qn in (f'Q{ii}' for ii in range(1, 15)):
fix_keys.append(qn)
log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP")

Check warning on line 490 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L488-L490

Added lines #L488 - L490 were not covered by tests
for key in fix_keys:
if not np.issubdtype(result[key].dtype, np.integer):
intcol = np.array(list(map(parse_letternumber, result[key])),
dtype=int)
if any(intcol == -999999):
intcol = np.ma.masked_where(intcol == -999999, intcol)

Check warning on line 496 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L495-L496

Added lines #L495 - L496 were not covered by tests
result[key] = intcol
if not np.issubdtype(result[key].dtype, np.integer):
raise ValueError(f"Failed to parse {key} as integer")

Check warning on line 499 in astroquery/linelists/cdms/core.py

View check run for this annotation

Codecov / codecov/patch

astroquery/linelists/cdms/core.py#L498-L499

Added lines #L498 - L499 were not covered by tests

result['LGINT'].unit = u.nm**2 * u.MHz
result['ELO'].unit = u.cm**(-1)
Expand All @@ -486,13 +514,16 @@
From the CDMS docs:
"Exactly two characters are available for each quantum number. Therefore, half
integer quanta are rounded up ! In addition, capital letters are used to
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
types are used to signal corresponding negative quantum numbers."
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
"""
if np.ma.is_masked(st):
return -999999

asc = string.ascii_lowercase
ASC = string.ascii_uppercase
newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else
str(ASC.index(x)+10) if x in ASC else
newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else
str((ASC.index(x)+10)) if x in ASC else
x for x in st])
return int(newst)

Expand Down
6 changes: 5 additions & 1 deletion astroquery/linelists/cdms/tests/test_cdms.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def test_query(patch_post):
assert tbl['LGINT'][0] == -7.1425
assert tbl['GUP'][0] == 3
assert tbl['GUP'][7] == 17
assert tbl['MOLWT'][0] == 28


def test_parseletternumber():
Expand All @@ -99,9 +100,12 @@ def test_parseletternumber():
assert parse_letternumber("Z9") == 359

# inferred?
assert parse_letternumber("z9") == -359
assert parse_letternumber("a0") == -10
assert parse_letternumber("b0") == -20
assert parse_letternumber("ZZ") == 3535

assert parse_letternumber(np.ma.masked) == -999999


def test_hc7s(patch_post):
"""
Expand Down
71 changes: 67 additions & 4 deletions astroquery/linelists/cdms/tests/test_cdms_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,55 @@ def test_remote_300K():
assert tbl['FREQ'][0] == 505366.7875
assert tbl['ERR'][0] == 49.13
assert tbl['LGINT'][0] == -4.2182
assert tbl['MOLWT'][0] == 18
assert tbl['TAG'][0] == 18505


@pytest.mark.remote_data
def test_co_basics():
tbl = CDMS.get_molecule('028503')
assert tbl['Q1'][0] == 1
assert tbl['Q7'][0] == 0
assert tbl['Q1'][10] == 11
assert tbl['Q7'][10] == 10
assert tbl['MOLWT'][0] == 28
assert tbl['TAG'][0] == -28503


@pytest.mark.remote_data
def test_ch3cn_negqn():
# 041505 = CH3CN on 2025-05-21
tbl = CDMS.get_molecule('041505')
assert tbl.meta['molecule'] == 'CH3CN, v=0'
fourtominusthree = tbl[(tbl['Q1'] == 4) & (tbl['Q2'] == -3)]
assert len(fourtominusthree) >= 1

# check specifically for -21, which is encoded as `b1`
twentytwominustwentyone = tbl[(tbl['Q1'] == 22) & (tbl['Q2'] == -21)]
assert len(twentytwominustwentyone) >= 1

assert tbl['TAG'][0] == 41505

twentythreeminustwentyone = tbl[(tbl['Q1'] == 23) & (tbl['Q2'] == -21)]
assert len(twentythreeminustwentyone) >= 1
assert twentythreeminustwentyone['TAG'][0] == -41505


@pytest.mark.remote_data
def test_propanediol():
tbl1 = CDMS.get_molecule('076513')
assert 'int' in tbl1['Q2'].dtype.name

tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz,
max_frequency=100.5 * u.GHz,
molecule='076513')
assert isinstance(tbl, Table)
assert len(tbl) >= 1
assert 'aG\'g-1,2-Propanediol' in tbl['name']
# check that the parser worked - this will be string or obj otherwise
assert 'int' in tbl['Ku'].dtype.name
assert tbl['MOLWT'][0] == 76
assert tbl['TAG'][0] == 76513


@pytest.mark.remote_data
Expand Down Expand Up @@ -66,16 +115,16 @@ def test_molecule_with_parens():

MC = np.ma.core.MaskedConstant()

for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66,
506, 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', False)):
for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66506,
303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', 66, False)):
if val is MC:
assert tbl[0][col].mask
else:
assert tbl[0][col] == val

# this test row includes degeneracy = 1225, which covers one of the weird letter-is-number parser cases
for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66,
506, 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', False),):
for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66506,
303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', 66, False),):
if val is MC:
assert tbl[16][col].mask
else:
Expand Down Expand Up @@ -121,6 +170,20 @@ def test_retrieve_species_table():
assert 'float' in species_table['lg(Q(1000))'].dtype.name


@pytest.mark.remote_data
def test_remote_all_species():
tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz,
max_frequency=100.5 * u.GHz,
min_strength=-5)
assert isinstance(tbl, Table)

AlS_is_in_table = (tbl['name'] == 'AlS').sum() > 0
Propanediol_is_in_table = (tbl['name'] == "aG'g-1,2-Propanediol").sum() > 0

assert AlS_is_in_table
assert Propanediol_is_in_table


@pytest.mark.bigdata
@pytest.mark.remote_data
class TestRegressionAllCats:
Expand Down
Loading