astropy · ParfenovS · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025 · Apr 27, 2025
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -27,6 +27,7 @@ linelists.cdms
 ^^^^^^^^^^^^^^
 
 - Add a keyword to control writing of new species cache files.  This is needed to prevent tests from overwriting those files. [#3297]
+- Add more complete support for CDMS quantum number and other value parsing. [#3302]
 
 heasarc
 ^^^^^^^
@@ -76,10 +77,10 @@ mast
 
 - Fix bug in ``utils.remove_duplicate_products`` that does not retain the order of the products in an input table. [#3314]
 
-- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs 
+- Added ``return_uri_map`` parameter to ``Observations.get_cloud_uris`` to return a mapping of the input data product URIs
   to the returned cloud URIs. [#3314]
 
-- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot 
+- Added ``verbose`` parameter to ``Observations.get_cloud_uris`` to control whether warnings are logged when a product cannot
   be found in the cloud. [#3314]
 
 

diff --git a/astroquery/linelists/cdms/core.py b/astroquery/linelists/cdms/core.py
@@ -12,6 +12,7 @@
 # import configurable items declared in __init__.py
 from astroquery.linelists.cdms import conf
 from astroquery.exceptions import InvalidQueryError, EmptyResponseError
+from astroquery import log
 
 import re
 import string
@@ -54,7 +55,8 @@
         min_strength : int, optional
             Minimum strength in catalog units, the default is -500
 
-        molecule : list, string of regex if parse_name_locally=True, optional
+        molecule : list or string if parse_name_locally=False,
+            string of regex if parse_name_locally=True, optional
             Identifiers of the molecules to search for. If this parameter
             is not provided the search will match any species. Default is 'All'.
             As a first pass, the molecule will be searched for with a direct
@@ -134,18 +136,21 @@
         # changes interpretation of query
         self._last_query_temperature = temperature_for_intensity
 
-        if molecule is not None:
-            if parse_name_locally:
-                self.lookup_ids = build_lookup()
-                luts = self.lookup_ids.find(molecule, flags)
-                if len(luts) == 0:
-                    raise InvalidQueryError('No matching species found. Please '
-                                            'refine your search or read the Docs '
-                                            'for pointers on how to search.')
-                payload['Molecules'] = tuple(f"{val:06d} {key}"
-                                             for key, val in luts.items())[0]
-            else:
-                payload['Molecules'] = molecule
+        if molecule == 'All':
+            payload['Moleculesgrp'] = 'all species'
+        else:
+            if molecule is not None:
+                if parse_name_locally:
+                    self.lookup_ids = build_lookup()
+                    luts = self.lookup_ids.find(molecule, flags)
+                    if len(luts) == 0:
+                        raise InvalidQueryError('No matching species found. Please '
+                                                'refine your search or read the Docs '
+                                                'for pointers on how to search.')
+                    payload['Molecules'] = tuple(f"{val:06d} {key}"
+                                                 for key, val in luts.items())[0]
+                else:
+                    payload['Molecules'] = molecule
 
         if get_query_payload:
             return payload
@@ -180,7 +185,7 @@
         # accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
         badlist = (self.MALFORMATTED_MOLECULE_LIST +  # noqa
                    [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
-        if payload['Molecules'] in badlist:
+        if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist:
             raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format.  "
                              f"Try get_molecule({payload['Molecules']}) instead.")
 
@@ -233,15 +238,32 @@
         soup = BeautifulSoup(response.text, 'html.parser')
         text = soup.find('pre').text
 
+        need_to_filter_bad_molecules = False
+        for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
+            if text.find(bad_molecule.split()[1]) > -1:
+                need_to_filter_bad_molecules = True
+                break
+        if need_to_filter_bad_molecules:
+            text_new = ''
+            text = text.split('\n')
+            for line in text:
+                need_to_include_line = True
+                for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
+                    if line.find(bad_molecule.split()[1]) > -1:
+                        need_to_include_line = False
+                        break
+                if need_to_include_line:
+                    text_new = text_new + '\n' + line
+            text = text_new
+
         starts = {'FREQ': 0,
                   'ERR': 14,
                   'LGINT': 25,
                   'DR': 36,
                   'ELO': 38,
                   'GUP': 47,
-                  'MOLWT': 51,
-                  'TAG': 54,
-                  'QNFMT': 58,
+                  'TAG': 50,
+                  'QNFMT': 57,
                   'Ju': 61,
                   'Ku': 63,
                   'vu': 65,
@@ -265,6 +287,7 @@
         result['FREQ'].unit = u.MHz
         result['ERR'].unit = u.MHz
 
+        result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
         result['Lab'] = result['MOLWT'] < 0
         result['MOLWT'] = np.abs(result['MOLWT'])
         result['MOLWT'].unit = u.Da
@@ -387,7 +410,7 @@
 
         return result
 
-    def get_molecule(self, molecule_id, *, cache=True):
+    def get_molecule(self, molecule_id, *, cache=True, return_response=False):
         """
         Retrieve the whole molecule table for a given molecule id
         """
@@ -396,6 +419,8 @@
         url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat'
         response = self._request(method='GET', url=url,
                                  timeout=self.TIMEOUT, cache=cache)
+        if return_response:
+            return response
         result = self._parse_cat(response)
 
         species_table = self.get_species_table()
@@ -426,21 +451,21 @@
                   'ELO': 32,
                   'GUP': 42,
                   'TAG': 44,
-                  'QNFMT': 52,
-                  'Q1': 56,
-                  'Q2': 58,
-                  'Q3': 60,
-                  'Q4': 62,
-                  'Q5': 64,
-                  'Q6': 66,
-                  'Q7': 68,
-                  'Q8': 70,
-                  'Q9': 72,
-                  'Q10': 74,
-                  'Q11': 76,
-                  'Q12': 78,
-                  'Q13': 80,
-                  'Q14': 82,
+                  'QNFMT': 51,
+                  'Q1': 55,
+                  'Q2': 57,
+                  'Q3': 59,
+                  'Q4': 61,
+                  'Q5': 63,
+                  'Q6': 65,
+                  'Q7': 67,
+                  'Q8': 69,
+                  'Q9': 71,
+                  'Q10': 73,
+                  'Q11': 75,
+                  'Q12': 77,
+                  'Q13': 79,
+                  'Q14': 81,
                   }
 
         result = ascii.read(text, header_start=None, data_start=0,
@@ -450,7 +475,7 @@
                             format='fixed_width', fast_reader=False)
 
         # int truncates - which is what we want
-        result['MOLWT'] = [int(x/1e4) for x in result['TAG']]
+        result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
 
         result['FREQ'].unit = u.MHz
         result['ERR'].unit = u.MHz
@@ -460,15 +485,18 @@
         result['MOLWT'].unit = u.Da
 
         fix_keys = ['GUP']
-        for suf in '':
-            for qn in (f'Q{ii}' for ii in range(1, 15)):
-                qnind = qn+suf
-                fix_keys.append(qnind)
+        for qn in (f'Q{ii}' for ii in range(1, 15)):
+            fix_keys.append(qn)
+        log.debug(f"fix_keys: {fix_keys} should include Q1, Q2, ..., Q14 and GUP")
         for key in fix_keys:
             if not np.issubdtype(result[key].dtype, np.integer):
                 intcol = np.array(list(map(parse_letternumber, result[key])),
                                   dtype=int)
+                if any(intcol == -999999):
+                    intcol = np.ma.masked_where(intcol == -999999, intcol)
                 result[key] = intcol
+                if not np.issubdtype(result[key].dtype, np.integer):
+                    raise ValueError(f"Failed to parse {key} as integer")
 
         result['LGINT'].unit = u.nm**2 * u.MHz
         result['ELO'].unit = u.cm**(-1)
@@ -486,13 +514,16 @@
     From the CDMS docs:
     "Exactly two characters are available for each quantum number. Therefore, half
     integer quanta are rounded up ! In addition, capital letters are used to
-    indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Small
-    types are used to signal corresponding negative quantum numbers."
+    indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
+    are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
     """
+    if np.ma.is_masked(st):
+        return -999999
+
     asc = string.ascii_lowercase
     ASC = string.ascii_uppercase
-    newst = ''.join(['-' + str(asc.index(x)+10) if x in asc else
-                     str(ASC.index(x)+10) if x in ASC else
+    newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else
+                     str((ASC.index(x)+10)) if x in ASC else
                      x for x in st])
     return int(newst)
 

diff --git a/astroquery/linelists/cdms/tests/test_cdms.py b/astroquery/linelists/cdms/tests/test_cdms.py
@@ -83,6 +83,7 @@ def test_query(patch_post):
     assert tbl['LGINT'][0] == -7.1425
     assert tbl['GUP'][0] == 3
     assert tbl['GUP'][7] == 17
+    assert tbl['MOLWT'][0] == 28
 
 
 def test_parseletternumber():
@@ -99,9 +100,12 @@ def test_parseletternumber():
     assert parse_letternumber("Z9") == 359
 
     # inferred?
-    assert parse_letternumber("z9") == -359
+    assert parse_letternumber("a0") == -10
+    assert parse_letternumber("b0") == -20
     assert parse_letternumber("ZZ") == 3535
 
+    assert parse_letternumber(np.ma.masked) == -999999
+
 
 def test_hc7s(patch_post):
     """

diff --git a/astroquery/linelists/cdms/tests/test_cdms_remote.py b/astroquery/linelists/cdms/tests/test_cdms_remote.py
@@ -38,6 +38,55 @@ def test_remote_300K():
     assert tbl['FREQ'][0] == 505366.7875
     assert tbl['ERR'][0] == 49.13
     assert tbl['LGINT'][0] == -4.2182
+    assert tbl['MOLWT'][0] == 18
+    assert tbl['TAG'][0] == 18505
+
+
+@pytest.mark.remote_data
+def test_co_basics():
+    tbl = CDMS.get_molecule('028503')
+    assert tbl['Q1'][0] == 1
+    assert tbl['Q7'][0] == 0
+    assert tbl['Q1'][10] == 11
+    assert tbl['Q7'][10] == 10
+    assert tbl['MOLWT'][0] == 28
+    assert tbl['TAG'][0] == -28503
+
+
+@pytest.mark.remote_data
+def test_ch3cn_negqn():
+    # 041505 = CH3CN on 2025-05-21
+    tbl = CDMS.get_molecule('041505')
+    assert tbl.meta['molecule'] == 'CH3CN, v=0'
+    fourtominusthree = tbl[(tbl['Q1'] == 4) & (tbl['Q2'] == -3)]
+    assert len(fourtominusthree) >= 1
+
+    # check specifically for -21, which is encoded as `b1`
+    twentytwominustwentyone = tbl[(tbl['Q1'] == 22) & (tbl['Q2'] == -21)]
+    assert len(twentytwominustwentyone) >= 1
+
+    assert tbl['TAG'][0] == 41505
+
+    twentythreeminustwentyone = tbl[(tbl['Q1'] == 23) & (tbl['Q2'] == -21)]
+    assert len(twentythreeminustwentyone) >= 1
+    assert twentythreeminustwentyone['TAG'][0] == -41505
+
+
+@pytest.mark.remote_data
+def test_propanediol():
+    tbl1 = CDMS.get_molecule('076513')
+    assert 'int' in tbl1['Q2'].dtype.name
+
+    tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz,
+                           max_frequency=100.5 * u.GHz,
+                           molecule='076513')
+    assert isinstance(tbl, Table)
+    assert len(tbl) >= 1
+    assert 'aG\'g-1,2-Propanediol' in tbl['name']
+    # check that the parser worked - this will be string or obj otherwise
+    assert 'int' in tbl['Ku'].dtype.name
+    assert tbl['MOLWT'][0] == 76
+    assert tbl['TAG'][0] == 76513
 
 
 @pytest.mark.remote_data
@@ -66,16 +115,16 @@ def test_molecule_with_parens():
 
     MC = np.ma.core.MaskedConstant()
 
-    for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66,
-                        506, 303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', False)):
+    for col, val in zip(tbl[0].colnames, (232588.7246, 0.2828, -4.1005, 3, 293.8540, 445, 66506,
+                        303, 44, 14, 30, MC, MC, MC, 45, 13, 33, MC, MC, MC, 'H2C(CN)2', 66, False)):
         if val is MC:
             assert tbl[0][col].mask
         else:
             assert tbl[0][col] == val
 
     # this test row includes degeneracy = 1225, which covers one of the weird letter-is-number parser cases
-    for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66,
-                        506, 303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', False),):
+    for col, val in zip(tbl[16].colnames, (233373.369, 10.26, -4.8704, 3, 1229.0674, 1125, 66506,
+                        303, 112, 10, 102, MC, MC, MC, 112, 9, 103, MC, MC, MC, 'H2C(CN)2', 66, False),):
         if val is MC:
             assert tbl[16][col].mask
         else:
@@ -121,6 +170,20 @@ def test_retrieve_species_table():
     assert 'float' in species_table['lg(Q(1000))'].dtype.name
 
 
+@pytest.mark.remote_data
+def test_remote_all_species():
+    tbl = CDMS.query_lines(min_frequency=100.3 * u.GHz,
+                           max_frequency=100.5 * u.GHz,
+                           min_strength=-5)
+    assert isinstance(tbl, Table)
+
+    AlS_is_in_table = (tbl['name'] == 'AlS').sum() > 0
+    Propanediol_is_in_table = (tbl['name'] == "aG'g-1,2-Propanediol").sum() > 0
+
+    assert AlS_is_in_table
+    assert Propanediol_is_in_table
+
+
 @pytest.mark.bigdata
 @pytest.mark.remote_data
 class TestRegressionAllCats: