Merge pull request #44 from xcube-dev/tejas-xxx-fix_list_data_ids_method

TejasMorbagal · web-flow · commit 8c7947c2154a · 2025-05-28T16:15:27.000+02:00
fix and update cmems store
diff --git a/.github/workflows/unitest-workflow.yml b/.github/workflows/unitest-workflow.yml
@@ -15,19 +15,34 @@ jobs:
       NUMBA_DISABLE_JIT: 1
     steps:
       - name: checkout xcube-cmems
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
-      - name: Set up MicroMamba
-        uses: mamba-org/provision-with-micromamba@main
+      - name: Set up Micromamba
+        uses: mamba-org/setup-micromamba@v2
         with:
-          cache-env: true
-          extra-specs: |
-            python=3.10
-
-      - name: Run unit tests
-        shell: bash -l {0}
-        run: |
-          cd /home/runner/work/xcube-cmems/xcube-cmems
-          ls
-          pytest 
-          
+            micromamba-version: '1.4.8-0'
+            environment-file: environment.yml
+            init-shell: >-
+                bash
+            # Don't cache the environment, since this would prevent us from
+            # catching test failures caused by updated versions of dependencies.
+            cache-environment: false
+            post-cleanup: 'all'
+
+      -   name: setup-xcube-cmems
+          shell: bash -l {0}
+          run: |
+              conda info
+              conda list
+              pip install -e .
+
+      -   name: unittest-xcube
+          shell: bash -l {0}
+          run: |
+              pytest --cov=xcube_cmems --cov-report=xml
+
+      -  uses: codecov/codecov-action@v4
+         with:
+             verbose: true # optional (default = false)
+
+
diff --git a/CHANGES.md b/CHANGES.md
@@ -4,6 +4,17 @@
   to `xcube_cmems` and entry point removed, since xcube plugins 
   auto-recognition is updated. (#39 and xcube-dev/xcube#963)
 
+- Support boolean-valued include_attrs in get_data_ids in accordance with API update in 
+  xcube 1.8.0.
+
+- Refactored `get_datasets_with_titles()` to align with the updated return type of 
+ `cm.describe()`. (now returning a `CopernicusMarineCatalogue` object). The function 
+  now accesses products and datasets via object attributes instead of dictionary keys.
+
+- Updated dependency versions to ensure compatibility with `copernicusmarine` >= 2.1.1.
+
+- Updated GitHub Actions workflow to use the latest Micromamba setup and use codecov.
+
 ## Changes in 0.1.5
 
 - Disabled metadata cache to make it more suitable for cloud based environments. (#36)
diff --git a/environment.yml b/environment.yml
@@ -2,19 +2,20 @@ name: xcube-cmems
 channels:
   - conda-forge
 dependencies:
+  # Python
+  - python >=3.10
   # Required
-  - copernicusmarine >=1.0.10
-  - numpy
+  - copernicusmarine >=2.1.1
+  - xcube >=1.9.1
+  - numpy <2.0.0 # to avoid inconsistent results with copernicusmarine package
+  - xarray >=2024.7.0 # to avoid inconsistent results with copernicusmarine package
   - pandas
-  - xarray
-  - xcube>=1.5.1
   # for testing
   - black
   - flake8
   - mock
-  - pathlib
   - pytest
-
+  - pytest-cov
 
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,10 +21,10 @@ license = {text = "MIT"}
 requires-python = ">=3.10"
 dependencies = [
     # Todo: add xcube-core when issue with matplotlib-base is solved
-    "copernicusmarine",
-    "numpy",
+    "copernicusmarine>=2.1.1",
+    "numpy<2.0.0",
+    "xarray>=2024.7.0",
     "pandas",
-    "xarray",
     "zarr"
 ]
 
@@ -41,22 +41,15 @@ exclude = [
 
 [project.optional-dependencies]
 dev = [
-  "pytest",
-  "pytest-cov",
-  "black",
-  "flake8",
-  "flake8-bugbear",
-    "pathlib"
-]
-doc = [
-  "mkdocs",
-  "mkdocs-material",
-  "mkdocstrings",
-  "mkdocstrings-python"
+      "pytest",
+      "pytest-cov",
+      "black",
+      "flake8",
+      "flake8-bugbear",
 ]
 
 [project.urls]
 Documentation = "https://dcs4cop.github.io/xcube-cmems/"
-Issues = "hhttps://github.com/dcs4cop/xcube-cmems/issues"
+Issues = "https://github.com/dcs4cop/xcube-cmems/issues"
 Changelog = "https://github.com/dcs4cop/xcube-cmems/blob/main/CHANGES.md"
 Repository = "https://github.com/dcs4cop/xcube-cmems"
diff --git a/test/test_cmems.py b/test/test_cmems.py
@@ -18,12 +18,13 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-import pathlib
+
 import os
 import unittest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
 
 from xcube_cmems.cmems import Cmems
-from unittest.mock import patch, MagicMock
 
 
 class CmemsTest(unittest.TestCase):
@@ -35,30 +36,28 @@ def setUp(self):
 
     @patch("xcube_cmems.cmems.cm.describe")
     def test_get_datasets_with_titles(self, mock_describe):
-        # Mock the response from cm.describe
-        mock_describe.return_value = {
-            "products": [
-                {
-                    "title": "Product A",
-                    "datasets": [
-                        {"dataset_id": "dataset1"},
-                        {"dataset_id": "dataset2"},
-                    ],
-                },
-                {"title": "Product B", "datasets": [{"dataset_id": "dataset3"}]},
-            ]
-        }
+        # Fake datasets
+        dataset1 = SimpleNamespace(dataset_id="dataset1", dataset_name="Dataset 1")
+        dataset2 = SimpleNamespace(dataset_id="dataset2", dataset_name="Dataset 2")
+        dataset3 = SimpleNamespace(dataset_id="dataset3", dataset_name="Dataset 3")
+
+        # Fake products
+        product_a = SimpleNamespace(title="Product A", datasets=[dataset1, dataset2])
+        product_b = SimpleNamespace(title="Product B", datasets=[dataset3])
+
+        # Fake catalogue
+        mock_catalogue = SimpleNamespace(products=[product_a, product_b])
+        mock_describe.return_value = mock_catalogue
+
         cmems = Cmems()
         datasets_info = cmems.get_datasets_with_titles()
 
-        # Expected result based on the mocked describe response
-        expected_result = [
-            {"title": "Product A", "dataset_id": "dataset1"},
-            {"title": "Product A", "dataset_id": "dataset2"},
-            {"title": "Product B", "dataset_id": "dataset3"},
+        expected = [
+            {"dataset_id": "dataset1", "title": "Product A - Dataset 1"},
+            {"dataset_id": "dataset2", "title": "Product A - Dataset 2"},
+            {"dataset_id": "dataset3", "title": "Product B - Dataset 3"},
         ]
-
-        self.assertEqual(datasets_info, expected_result)
+        self.assertEqual(datasets_info, expected)
 
     @patch("xcube_cmems.cmems.cm.open_dataset")
     def test_open_dataset(self, mock_open_dataset):
diff --git a/test/test_store.py b/test/test_store.py
@@ -19,16 +19,16 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import unittest
-
-import xarray as xr
 from datetime import datetime, timedelta
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
 
+import xarray as xr
 import xcube.core.store.descriptor as xcube_des
 from xcube.util.jsonschema import JsonObjectSchema
+
 from xcube_cmems.constants import DATASET_OPENER_ID
-from xcube_cmems.store import CmemsDatasetOpener
-from xcube_cmems.store import CmemsDataStore
+from xcube_cmems.store import CmemsDatasetOpener, CmemsDataStore
+
 from .sample_data import create_cmems_dataset
 
 
diff --git a/xcube_cmems/cmems.py b/xcube_cmems/cmems.py
@@ -56,13 +56,17 @@ def __init__(
 
     @classmethod
     def get_datasets_with_titles(cls) -> List[dict]:
-        catalogue: dict = cm.describe(include_datasets=True, no_metadata_cache=True)
+        catalogue: CopernicusMarineCatalogue = cm.describe()
         datasets_info: List[dict] = []
-        for product in catalogue["products"]:
-            product_title = product["title"]
-            for dataset in product["datasets"]:
-                dataset_id: str = dataset["dataset_id"]
-                datasets_info.append({"title": product_title, "dataset_id": dataset_id})
+        for product in catalogue.products:
+            product_title = product.title
+            for dataset in product.datasets:
+                datasets_info.append(
+                    {
+                        "dataset_id": dataset.dataset_id,
+                        "title": f"{product_title} - {dataset.dataset_name}",
+                    }
+                )
         return datasets_info
 
     def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
@@ -72,7 +76,6 @@ def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
                 dataset_id=dataset_id,
                 username=self.cmems_username,
                 password=self.cmems_password,
-                no_metadata_cache=True,
                 **open_params,
             )
             return ds
diff --git a/xcube_cmems/plugin.py b/xcube_cmems/plugin.py
@@ -19,11 +19,10 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from xcube.constants import EXTENSION_POINT_DATA_OPENERS, EXTENSION_POINT_DATA_STORES
 from xcube.util import extension
-from xcube.constants import EXTENSION_POINT_DATA_OPENERS
-from xcube.constants import EXTENSION_POINT_DATA_STORES
-from xcube_cmems.constants import DATASET_OPENER_ID
-from xcube_cmems.constants import DATA_STORE_ID
+
+from xcube_cmems.constants import DATA_STORE_ID, DATASET_OPENER_ID
 
 
 def init_plugin(ext_registry: extension.ExtensionRegistry):
diff --git a/xcube_cmems/store.py b/xcube_cmems/store.py
@@ -19,24 +19,23 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from typing import Any, List, Tuple, Container, Union, Iterator, Dict
-
 import logging
-import xarray as xr
+from typing import Any, Container, Dict, Iterator, List, Optional, Tuple
+
 import numpy as np
 import pandas as pd
-
+import xarray as xr
 from xarray.core.dataset import DataVariables
 from xcube.core.gridmapping import GridMapping
 from xcube.core.store import (
     DATASET_TYPE,
     DataDescriptor,
     DataOpener,
+    DatasetDescriptor,
     DataStore,
     DataStoreError,
     DataType,
     DataTypeLike,
-    DatasetDescriptor,
     VariableDescriptor,
 )
 from xcube.util.assertions import assert_not_none
@@ -87,19 +86,18 @@ def _get_var_descriptors(
         return var_descriptors
 
     @staticmethod
-    def _determine_time_period(data: xr.Dataset):
+    def _determine_time_period(data: xr.Dataset) -> Optional[str]:
         if "time" in data and len(data["time"].values) > 1:
-            time_diff = (
-                data["time"].diff(dim=data["time"].dims[0]).values.astype(np.float64)
-            )
+            time_diff = data["time"].diff(dim=data["time"].dims[0]).values
             time_res = time_diff[0]
-            time_regular = np.allclose(time_res, time_diff, 1e-8)
+            time_regular = np.allclose(time_diff, time_res, rtol=1e-8, atol=0)
             if time_regular:
                 time_period = pd.to_timedelta(time_res).isoformat()
                 # remove leading P
                 time_period = time_period[1:]
                 # removing sub-day precision
                 return time_period.split("T")[0]
+        return None
 
     def describe_data(self, data_id: str) -> DatasetDescriptor:
         xr_ds = self.cmems.open_dataset(data_id)
@@ -228,11 +226,21 @@ def _get_opener(
         return self._dataset_opener
 
     def get_data_ids(
-        self, data_type: DataTypeLike = None, include_attrs: Container[str] = None
-    ) -> Union[Iterator[str], Iterator[Tuple[str, Dict[str, Any]]]]:
+        self,
+        data_type: DataTypeLike = None,
+        include_attrs: Container[str] | bool = False,
+    ) -> Iterator[str] | Iterator[Tuple[str, Dict[str, Any]]]:
+
         dataset_ids_with_titles = self._dataset_opener.cmems.get_datasets_with_titles()
-        return_tuples = include_attrs is not None
-        include_titles = return_tuples and "title" in include_attrs
+
+        if isinstance(include_attrs, bool):
+            return_tuples = include_attrs
+            include_titles = include_attrs
+        elif isinstance(include_attrs, Container):
+            return_tuples = True
+            include_titles = "title" in include_attrs
+        else:
+            raise ValueError(f"Invalid type {type(include_attrs)} for include_attrs")
 
         for dataset in dataset_ids_with_titles:
             data_id = dataset["dataset_id"]