Skip to content

Commit 06f38e6

Browse files
Merge pull request #49 from xcube-dev/tejas-48-fix-issues
Fix xcube-cmems issues
2 parents 0b0b997 + 30bb7d5 commit 06f38e6

File tree

10 files changed

+1085
-1036
lines changed

10 files changed

+1085
-1036
lines changed

CHANGES.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
## Changes in 0.1.7
1+
## Changes in 0.1.8
2+
3+
- Made metadata attributes of datasets returned by the data store JSON-serializable to
4+
avoid problems in xcube Server. (#48)
25

6+
- Disabled progress bar in list_data_ids() to avoid stdout bloats in JNB (#47)
7+
8+
## Changes in 0.1.7
39

410
- Ensuring compatibility with latest `copernicusmarine` requirements
511

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,9 @@ $ pytest
7474
```
7575

7676
in the `xcube-cmems` repository. Note that, in order to successfully run the
77-
tests using the current repository version of `xcube-cmems`, you may also need to
78-
install the repository source version of `xcube` rather than its latest conda-forge
79-
release and have the cmems credentials set as environment variables `CMEMS_USERNAME`
80-
and `CMEMS_PASSWORD`.
77+
tests using the current repository version of `xcube-cmems`, you may also need to have
78+
the cmems credentials set as environment variables
79+
`COPERNICUSMARINE_SERVICE_USERNAME` and `COPERNICUSMARINE_SERVICE_PASSWORD`.
8180

8281
To create a test coverage report, you can use
8382

@@ -95,4 +94,4 @@ in the `examples/notebooks/` subdirectory of the repository.
9594
## Releasing
9695

9796
To release `xcube-cmems`, please follow the steps outlined in the
98-
[xcube Developer Guide](https://github.com/dcs4cop/xcube/blob/master/docs/source/devguide.md#release-process).
97+
[xcube Developer Guide](https://github.com/dcs4cop/xcube/blob/master/docs/source/devguide.md#release-process).

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ dependencies:
66
- python >=3.10
77
# Required
88
- copernicusmarine >=2.1.1
9-
- xcube >=1.9.1
10-
- numpy >=2.0.0 # to avoid inconsistent results with copernicusmarine package
9+
- xcube >=1.11.1
10+
- numpy
1111
- pandas
1212
# for testing
1313
- black

examples/notebooks/cmems_example_JNB.ipynb

Lines changed: 975 additions & 989 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ requires-python = ">=3.10"
2222
dependencies = [
2323
# Todo: add xcube-core when issue with matplotlib-base is solved
2424
"copernicusmarine>=2.1.1",
25-
"numpy>=2.0.0",
25+
"numpy",
2626
"pandas",
2727
"zarr"
2828
]

test/test_cmems.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,35 @@
2424
from types import SimpleNamespace
2525
from unittest.mock import MagicMock, patch
2626

27+
import numpy as np
28+
import xarray as xr
29+
2730
from xcube_cmems.cmems import Cmems
2831

2932

3033
class CmemsTest(unittest.TestCase):
31-
3234
def setUp(self):
3335
# Setup environment variables for testing
34-
os.environ["COPERNICUS_MARINE_SERVICE_USERNAME"] = "testuser"
35-
os.environ["COPERNICUS_MARINE_SERVICE_PASSWORD"] = "testpass"
36+
os.environ["COPERNICUSMARINE_SERVICE_USERNAME"] = "testuser"
37+
os.environ["COPERNICUSMARINE_SERVICE_PASSWORD"] = "testpass"
38+
self.cmems = Cmems()
3639

3740
@patch("xcube_cmems.cmems.cm.describe")
3841
def test_get_datasets_with_titles(self, mock_describe):
39-
# Fake datasets
42+
# Mock datasets
4043
dataset1 = SimpleNamespace(dataset_id="dataset1", dataset_name="Dataset 1")
4144
dataset2 = SimpleNamespace(dataset_id="dataset2", dataset_name="Dataset 2")
4245
dataset3 = SimpleNamespace(dataset_id="dataset3", dataset_name="Dataset 3")
4346

44-
# Fake products
47+
# Mock products
4548
product_a = SimpleNamespace(title="Product A", datasets=[dataset1, dataset2])
4649
product_b = SimpleNamespace(title="Product B", datasets=[dataset3])
4750

48-
# Fake catalogue
51+
# Mock catalogue
4952
mock_catalogue = SimpleNamespace(products=[product_a, product_b])
5053
mock_describe.return_value = mock_catalogue
5154

52-
cmems = Cmems()
53-
datasets_info = cmems.get_datasets_with_titles()
55+
datasets_info = self.cmems.get_datasets_with_titles()
5456

5557
expected = [
5658
{"dataset_id": "dataset1", "title": "Product A - Dataset 1"},
@@ -64,19 +66,58 @@ def test_open_dataset(self, mock_open_dataset):
6466
# Mock the response from cm.open_dataset
6567
mock_dataset = MagicMock()
6668
mock_open_dataset.return_value = mock_dataset
67-
cmems_instance = Cmems()
68-
result = cmems_instance.open_dataset("dataset1")
69+
result = self.cmems.open_dataset("dataset1")
6970
self.assertEqual(result, mock_dataset)
7071

7172
# Testing with a non-existing dataset
7273
mock_open_dataset.side_effect = KeyError("Dataset not found")
73-
result = cmems_instance.open_dataset("non_existing_dataset")
74+
result = self.cmems.open_dataset("non_existing_dataset")
7475
self.assertIsNone(result)
7576

7677
@patch("click.confirm", return_value=True)
7778
def test_open_data_for_not_exsiting_dataset(self, mock_confirm):
78-
cmems = Cmems()
7979
self.assertIsNone(
80-
cmems.open_dataset("dataset-bal-analysis-forecast" "-wav-hourly"),
80+
self.cmems.open_dataset("dataset-bal-analysis-forecast" "-wav-hourly"),
8181
"Expected the method to return None for a " "non-existing dataset",
8282
)
83+
84+
def test_to_json_serializable_scalar_types(self):
85+
assert self.cmems.to_json_serializable(np.int16(5)) == 5
86+
assert self.cmems.to_json_serializable(np.bool_(True)) is True
87+
88+
def test_to_json_serializable_ndarray(self):
89+
arr = np.array([1, 2, 3], dtype=np.int32)
90+
result = self.cmems.to_json_serializable(arr)
91+
assert result == [1, 2, 3]
92+
assert isinstance(result, list)
93+
94+
def test_to_json_serializable_nested_structures(self):
95+
obj = {
96+
"a": np.int32(1),
97+
"b": [np.float64(2.2), {"c": np.bool_(False)}],
98+
"d": (np.int8(4),),
99+
}
100+
expected = {"a": 1, "b": [2.2, {"c": False}], "d": [4]}
101+
assert self.cmems.to_json_serializable(obj) == expected
102+
103+
def test_sanitize_attrs_dataset(self):
104+
data = xr.Dataset(
105+
{
106+
"var1": xr.DataArray(
107+
np.random.rand(2, 2),
108+
dims=["x", "y"],
109+
attrs={"int_attr": np.int16(10), "float_attr": np.float64(2.5)},
110+
)
111+
},
112+
attrs={"global_attr": np.bool_(True)},
113+
)
114+
115+
sanitized = self.cmems.sanitize_attrs(data)
116+
117+
# Ensure all attributes are native Python types
118+
for k, v in sanitized.attrs.items():
119+
assert isinstance(v, (int, float, bool, str, list, dict))
120+
121+
for var in sanitized.data_vars:
122+
for k, v in sanitized[var].attrs.items():
123+
assert isinstance(v, (int, float, bool, str, list, dict))

test/test_store.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333

3434

3535
class CmemsDataOpenerTest(unittest.TestCase):
36-
3736
def setUp(self) -> None:
3837
self.dataset_id = "cmems_mod_arc_bgc_anfc_ecosmo_P1D-m"
3938
self.opener = CmemsDatasetOpener()
@@ -100,7 +99,6 @@ def test_describe_data(self, mock_open_dataset):
10099

101100

102101
class CmemsDataStoreTest(unittest.TestCase):
103-
104102
def setUp(self) -> None:
105103
self.dataset_id = "cmems_mod_arc_bgc_anfc_ecosmo_P1D-m"
106104
self.mock_datasets = [
@@ -191,17 +189,16 @@ def test_get_data_opener_ids_with_valid_data_id(self):
191189

192190

193191
class CmemsDataStoreParamsTest(unittest.TestCase):
194-
195192
def test_store_for_cmems_credentials(self):
196193
params = {"cmems_username": "", "cmems_password": ""}
197194
with self.assertRaises(Exception) as e:
198195
CmemsDataStore(**params)
199196
self.assertEqual(
200-
"CmemsDataStore needs cmems credentials to "
201-
"be provided either as "
202-
"environment variables CMEMS_USERNAME and "
203-
"CMEMS_PASSWORD, or to be "
204-
"provided as store params cmems_username and "
205-
"cmems_password",
197+
"CmemsDataStore needs cmems credentials to "
198+
"be provided either as "
199+
"environment variables COPERNICUSMARINE_SERVICE_USERNAME and "
200+
"COPERNICUSMARINE_SERVICE_PASSWORD, or to be "
201+
"provided as store params cmems_username and "
202+
"cmems_password",
206203
f"{e.exception}",
207204
)

xcube_cmems/cmems.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,39 @@
2323
from typing import List, Optional
2424

2525
import copernicusmarine as cm
26+
import numpy as np
2627
import xarray as xr
28+
from copernicusmarine import CopernicusMarineCatalogue
2729

2830

2931
class Cmems:
30-
3132
def __init__(
32-
self,
33-
cmems_username: Optional[str] = None,
34-
cmems_password: Optional[str] = None,
33+
self, cmems_username: Optional[str] = None, cmems_password: Optional[str] = None
3534
):
3635
self.cmems_username = (
3736
cmems_username
3837
if cmems_username is not None
39-
else os.getenv("CMEMS_USERNAME")
38+
else os.getenv("COPERNICUSMARINE_SERVICE_USERNAME")
4039
)
4140
self.cmems_password = (
4241
cmems_password
4342
if cmems_password is not None
44-
else os.getenv("CMEMS_PASSWORD")
43+
else os.getenv("COPERNICUSMARINE_SERVICE_PASSWORD")
4544
)
4645

4746
if not self.cmems_username or not self.cmems_password:
4847
raise ValueError(
4948
"CmemsDataStore needs cmems credentials to "
5049
"be provided either as "
51-
"environment variables CMEMS_USERNAME and "
52-
"CMEMS_PASSWORD, or to be "
50+
"environment variables COPERNICUSMARINE_SERVICE_USERNAME and "
51+
"COPERNICUSMARINE_SERVICE_PASSWORD, or to be "
5352
"provided as store params cmems_username and "
5453
"cmems_password"
5554
)
5655

5756
@classmethod
5857
def get_datasets_with_titles(cls) -> List[dict]:
59-
catalogue: CopernicusMarineCatalogue = cm.describe()
58+
catalogue: CopernicusMarineCatalogue = cm.describe(disable_progress_bar=True)
6059
datasets_info: List[dict] = []
6160
for product in catalogue.products:
6261
product_title = product.title
@@ -69,6 +68,31 @@ def get_datasets_with_titles(cls) -> List[dict]:
6968
)
7069
return datasets_info
7170

71+
def to_json_serializable(self, obj):
72+
"""Convert NumPy types and nested structures to JSON-serializable types."""
73+
if isinstance(obj, np.integer):
74+
return int(obj)
75+
elif isinstance(obj, np.floating):
76+
return float(obj)
77+
elif isinstance(obj, np.bool_):
78+
return bool(obj)
79+
elif isinstance(obj, np.ndarray):
80+
return obj.tolist()
81+
elif isinstance(obj, dict):
82+
return {str(k): self.to_json_serializable(v) for k, v in obj.items()}
83+
elif isinstance(obj, (list, tuple)):
84+
return [self.to_json_serializable(i) for i in obj]
85+
return obj
86+
87+
def sanitize_attrs(self, ds: xr.Dataset) -> xr.Dataset:
88+
"""Sanitize dataset and variable attributes for JSON serialization."""
89+
for var in ds.data_vars:
90+
ds[var].attrs = {
91+
str(k): self.to_json_serializable(v) for k, v in ds[var].attrs.items()
92+
}
93+
ds.attrs = {str(k): self.to_json_serializable(v) for k, v in ds.attrs.items()}
94+
return ds
95+
7296
def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
7397
try:
7498

@@ -78,6 +102,7 @@ def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
78102
password=self.cmems_password,
79103
**open_params,
80104
)
105+
ds = self.sanitize_attrs(ds)
81106
return ds
82107
except KeyError as e:
83108
print(f"Error: {e}.")

xcube_cmems/store.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,8 @@ def get_open_data_params_schema(self, data_id: str = None) -> JsonObjectSchema:
176176

177177

178178
class CmemsDatasetOpener(CmemsDataOpener):
179-
180179
def __init__(self, **cmems_params):
181-
super().__init__(
182-
Cmems(**cmems_params),
183-
DATASET_OPENER_ID,
184-
DATASET_TYPE,
185-
)
180+
super().__init__(Cmems(**cmems_params), DATASET_OPENER_ID, DATASET_TYPE)
186181

187182

188183
class CmemsDataStore(DataStore):

xcube_cmems/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2020
# DEALINGS IN THE SOFTWARE.
2121

22-
version = "0.1.7"
22+
version = "0.1.8.dev0"

0 commit comments

Comments
 (0)