Skip to content

Commit bf38ad5

Browse files
committed
fix xcube-cmems issues
1 parent 0b0b997 commit bf38ad5

File tree

8 files changed

+96
-33
lines changed

8 files changed

+96
-33
lines changed

CHANGES.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
## Changes in 0.1.7
1+
## Changes in 0.1.8
2+
3+
- Made dataset attributes JSON-serializable in cmems open_dataset() to avoid xcube
4+
serve errors. (#48)
25

6+
- Disabled progress bar in list_data_ids() to avoid stdout bloats in JNB (#47)
7+
8+
## Changes in 0.1.7
39

410
- Ensuring compatibility with latest `copernicusmarine` requirements
511

environment.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ dependencies:
66
- python >=3.10
77
# Required
88
- copernicusmarine >=2.1.1
9-
- xcube >=1.9.1
10-
- numpy >=2.0.0 # to avoid inconsistent results with copernicusmarine package
9+
- xcube >=1.11.1
1110
- pandas
1211
# for testing
1312
- black

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ requires-python = ">=3.10"
2222
dependencies = [
2323
# Todo: add xcube-core when issue with matplotlib-base is solved
2424
"copernicusmarine>=2.1.1",
25-
"numpy>=2.0.0",
2625
"pandas",
2726
"zarr"
2827
]

test/test_cmems.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,18 @@
2424
from types import SimpleNamespace
2525
from unittest.mock import MagicMock, patch
2626

27+
import numpy as np
28+
import xarray as xr
29+
2730
from xcube_cmems.cmems import Cmems
2831

2932

3033
class CmemsTest(unittest.TestCase):
31-
3234
def setUp(self):
3335
# Setup environment variables for testing
34-
os.environ["COPERNICUS_MARINE_SERVICE_USERNAME"] = "testuser"
35-
os.environ["COPERNICUS_MARINE_SERVICE_PASSWORD"] = "testpass"
36+
os.environ["COPERNICUSMARINE_SERVICE_USERNAME"] = "testuser"
37+
os.environ["COPERNICUSMARINE_SERVICE_PASSWORD"] = "testpass"
38+
self.cmems = Cmems()
3639

3740
@patch("xcube_cmems.cmems.cm.describe")
3841
def test_get_datasets_with_titles(self, mock_describe):
@@ -49,8 +52,8 @@ def test_get_datasets_with_titles(self, mock_describe):
4952
mock_catalogue = SimpleNamespace(products=[product_a, product_b])
5053
mock_describe.return_value = mock_catalogue
5154

52-
cmems = Cmems()
53-
datasets_info = cmems.get_datasets_with_titles()
55+
# cmems = Cmems()
56+
datasets_info = self.cmems.get_datasets_with_titles()
5457

5558
expected = [
5659
{"dataset_id": "dataset1", "title": "Product A - Dataset 1"},
@@ -64,19 +67,58 @@ def test_open_dataset(self, mock_open_dataset):
6467
# Mock the response from cm.open_dataset
6568
mock_dataset = MagicMock()
6669
mock_open_dataset.return_value = mock_dataset
67-
cmems_instance = Cmems()
68-
result = cmems_instance.open_dataset("dataset1")
70+
result = self.cmems.open_dataset("dataset1")
6971
self.assertEqual(result, mock_dataset)
7072

7173
# Testing with a non-existing dataset
7274
mock_open_dataset.side_effect = KeyError("Dataset not found")
73-
result = cmems_instance.open_dataset("non_existing_dataset")
75+
result = self.cmems.open_dataset("non_existing_dataset")
7476
self.assertIsNone(result)
7577

7678
@patch("click.confirm", return_value=True)
7779
def test_open_data_for_not_exsiting_dataset(self, mock_confirm):
78-
cmems = Cmems()
7980
self.assertIsNone(
80-
cmems.open_dataset("dataset-bal-analysis-forecast" "-wav-hourly"),
81+
self.cmems.open_dataset("dataset-bal-analysis-forecast" "-wav-hourly"),
8182
"Expected the method to return None for a " "non-existing dataset",
8283
)
84+
85+
def test_to_json_serializable_scalar_types(self):
86+
assert self.cmems.to_json_serializable(np.int16(5)) == 5
87+
assert self.cmems.to_json_serializable(np.bool_(True)) is True
88+
89+
def test_to_json_serializable_ndarray(self):
90+
arr = np.array([1, 2, 3], dtype=np.int32)
91+
result = self.cmems.to_json_serializable(arr)
92+
assert result == [1, 2, 3]
93+
assert isinstance(result, list)
94+
95+
def test_to_json_serializable_nested_structures(self):
96+
obj = {
97+
"a": np.int32(1),
98+
"b": [np.float64(2.2), {"c": np.bool_(False)}],
99+
"d": (np.int8(4),),
100+
}
101+
expected = {"a": 1, "b": [2.2, {"c": False}], "d": [4]}
102+
assert self.cmems.to_json_serializable(obj) == expected
103+
104+
def test_sanitize_attrs_dataset(self):
105+
data = xr.Dataset(
106+
{
107+
"var1": xr.DataArray(
108+
np.random.rand(2, 2),
109+
dims=["x", "y"],
110+
attrs={"int_attr": np.int16(10), "float_attr": np.float64(2.5)},
111+
)
112+
},
113+
attrs={"global_attr": np.bool_(True)},
114+
)
115+
116+
sanitized = self.cmems.sanitize_attrs(data)
117+
118+
# Ensure all attributes are native Python types
119+
for k, v in sanitized.attrs.items():
120+
assert isinstance(v, (int, float, bool, str, list, dict))
121+
122+
for var in sanitized.data_vars:
123+
for k, v in sanitized[var].attrs.items():
124+
assert isinstance(v, (int, float, bool, str, list, dict))

test/test_store.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333

3434

3535
class CmemsDataOpenerTest(unittest.TestCase):
36-
3736
def setUp(self) -> None:
3837
self.dataset_id = "cmems_mod_arc_bgc_anfc_ecosmo_P1D-m"
3938
self.opener = CmemsDatasetOpener()
@@ -100,7 +99,6 @@ def test_describe_data(self, mock_open_dataset):
10099

101100

102101
class CmemsDataStoreTest(unittest.TestCase):
103-
104102
def setUp(self) -> None:
105103
self.dataset_id = "cmems_mod_arc_bgc_anfc_ecosmo_P1D-m"
106104
self.mock_datasets = [
@@ -191,7 +189,6 @@ def test_get_data_opener_ids_with_valid_data_id(self):
191189

192190

193191
class CmemsDataStoreParamsTest(unittest.TestCase):
194-
195192
def test_store_for_cmems_credentials(self):
196193
params = {"cmems_username": "", "cmems_password": ""}
197194
with self.assertRaises(Exception) as e:

xcube_cmems/cmems.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,40 +23,39 @@
2323
from typing import List, Optional
2424

2525
import copernicusmarine as cm
26+
import numpy as np
2627
import xarray as xr
28+
from copernicusmarine import CopernicusMarineCatalogue
2729

2830

2931
class Cmems:
30-
3132
def __init__(
32-
self,
33-
cmems_username: Optional[str] = None,
34-
cmems_password: Optional[str] = None,
33+
self, cmems_username: Optional[str] = None, cmems_password: Optional[str] = None
3534
):
3635
self.cmems_username = (
3736
cmems_username
3837
if cmems_username is not None
39-
else os.getenv("CMEMS_USERNAME")
38+
else os.getenv("COPERNICUSMARINE_SERVICE_USERNAME")
4039
)
4140
self.cmems_password = (
4241
cmems_password
4342
if cmems_password is not None
44-
else os.getenv("CMEMS_PASSWORD")
43+
else os.getenv("COPERNICUSMARINE_SERVICE_PASSWORD")
4544
)
4645

4746
if not self.cmems_username or not self.cmems_password:
4847
raise ValueError(
4948
"CmemsDataStore needs cmems credentials to "
5049
"be provided either as "
51-
"environment variables CMEMS_USERNAME and "
52-
"CMEMS_PASSWORD, or to be "
50+
"environment variables COPERNICUSMARINE_SERVICE_USERNAME and "
51+
"COPERNICUSMARINE_SERVICE_PASSWORD, or to be "
5352
"provided as store params cmems_username and "
5453
"cmems_password"
5554
)
5655

5756
@classmethod
5857
def get_datasets_with_titles(cls) -> List[dict]:
59-
catalogue: CopernicusMarineCatalogue = cm.describe()
58+
catalogue: CopernicusMarineCatalogue = cm.describe(disable_progress_bar=True)
6059
datasets_info: List[dict] = []
6160
for product in catalogue.products:
6261
product_title = product.title
@@ -69,6 +68,31 @@ def get_datasets_with_titles(cls) -> List[dict]:
6968
)
7069
return datasets_info
7170

71+
def to_json_serializable(self, obj):
72+
"""Convert NumPy types and nested structures to JSON-serializable types."""
73+
if isinstance(obj, np.integer):
74+
return int(obj)
75+
elif isinstance(obj, np.floating):
76+
return float(obj)
77+
elif isinstance(obj, np.bool_):
78+
return bool(obj)
79+
elif isinstance(obj, np.ndarray):
80+
return obj.tolist()
81+
elif isinstance(obj, dict):
82+
return {str(k): self.to_json_serializable(v) for k, v in obj.items()}
83+
elif isinstance(obj, (list, tuple)):
84+
return [self.to_json_serializable(i) for i in obj]
85+
return obj
86+
87+
def sanitize_attrs(self, ds: xr.Dataset) -> xr.Dataset:
88+
"""Sanitize dataset and variable attributes for JSON serialization."""
89+
for var in ds.data_vars:
90+
ds[var].attrs = {
91+
str(k): self.to_json_serializable(v) for k, v in ds[var].attrs.items()
92+
}
93+
ds.attrs = {str(k): self.to_json_serializable(v) for k, v in ds.attrs.items()}
94+
return ds
95+
7296
def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
7397
try:
7498

@@ -78,6 +102,7 @@ def open_dataset(self, dataset_id, **open_params) -> xr.Dataset:
78102
password=self.cmems_password,
79103
**open_params,
80104
)
105+
ds = self.sanitize_attrs(ds)
81106
return ds
82107
except KeyError as e:
83108
print(f"Error: {e}.")

xcube_cmems/store.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,8 @@ def get_open_data_params_schema(self, data_id: str = None) -> JsonObjectSchema:
176176

177177

178178
class CmemsDatasetOpener(CmemsDataOpener):
179-
180179
def __init__(self, **cmems_params):
181-
super().__init__(
182-
Cmems(**cmems_params),
183-
DATASET_OPENER_ID,
184-
DATASET_TYPE,
185-
)
180+
super().__init__(Cmems(**cmems_params), DATASET_OPENER_ID, DATASET_TYPE)
186181

187182

188183
class CmemsDataStore(DataStore):

xcube_cmems/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2020
# DEALINGS IN THE SOFTWARE.
2121

22-
version = "0.1.7"
22+
version = "0.1.8.dev0"

0 commit comments

Comments
 (0)