Skip to content

Commit c6c01b1

Browse files
authored
Support pandas copy-on-write behaviour (#8846)
* Support pandas copy-on-write behaviour Closes #8843 * Update xarray/tests/__init__.py * One more fix * Fix interp * Avoid copy * Try again
1 parent fbcac76 commit c6c01b1

File tree

6 files changed

+67
-43
lines changed

6 files changed

+67
-43
lines changed

xarray/core/variable.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,14 @@ def _possibly_convert_objects(values):
209209
as_series = pd.Series(values.ravel(), copy=False)
210210
if as_series.dtype.kind in "mM":
211211
as_series = _as_nanosecond_precision(as_series)
212-
return np.asarray(as_series).reshape(values.shape)
212+
result = np.asarray(as_series).reshape(values.shape)
213+
if not result.flags.writeable:
214+
# GH8843, pandas copy-on-write mode creates read-only arrays by default
215+
try:
216+
result.flags.writeable = True
217+
except ValueError:
218+
result = result.copy()
219+
return result
213220

214221

215222
def _possibly_convert_datetime_or_timedelta_index(data):

xarray/tests/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401
2121
from xarray.core.indexing import ExplicitlyIndexed
2222
from xarray.core.options import set_options
23+
from xarray.core.variable import IndexVariable
2324
from xarray.testing import ( # noqa: F401
2425
assert_chunks_equal,
2526
assert_duckarray_allclose,
@@ -47,6 +48,15 @@
4748
)
4849

4950

51+
def assert_writeable(ds):
52+
readonly = [
53+
name
54+
for name, var in ds.variables.items()
55+
if not isinstance(var, IndexVariable) and not var.data.flags.writeable
56+
]
57+
assert not readonly, readonly
58+
59+
5060
def _importorskip(
5161
modname: str, minversion: str | None = None
5262
) -> tuple[bool, pytest.MarkDecorator]:
@@ -326,7 +336,7 @@ def create_test_data(
326336
numbers_values = np.random.randint(0, 3, _dims["dim3"], dtype="int64")
327337
obj.coords["numbers"] = ("dim3", numbers_values)
328338
obj.encoding = {"foo": "bar"}
329-
assert all(obj.data.flags.writeable for obj in obj.variables.values())
339+
assert_writeable(obj)
330340
return obj
331341

332342

xarray/tests/test_backends.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,7 +2605,9 @@ def test_append_with_append_dim_no_overwrite(self) -> None:
26052605
# overwrite a coordinate;
26062606
# for mode='a-', this will not get written to the store
26072607
# because it does not have the append_dim as a dim
2608-
ds_to_append.lon.data[:] = -999
2608+
lon = ds_to_append.lon.to_numpy().copy()
2609+
lon[:] = -999
2610+
ds_to_append["lon"] = lon
26092611
ds_to_append.to_zarr(
26102612
store_target, mode="a-", append_dim="time", **self.version_kwargs
26112613
)
@@ -2615,7 +2617,9 @@ def test_append_with_append_dim_no_overwrite(self) -> None:
26152617
# by default, mode="a" will overwrite all coordinates.
26162618
ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
26172619
actual = xr.open_dataset(store_target, engine="zarr", **self.version_kwargs)
2618-
original2.lon.data[:] = -999
2620+
lon = original2.lon.to_numpy().copy()
2621+
lon[:] = -999
2622+
original2["lon"] = lon
26192623
assert_identical(original2, actual)
26202624

26212625
@requires_dask

xarray/tests/test_dataset.py

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
assert_equal,
5252
assert_identical,
5353
assert_no_warnings,
54+
assert_writeable,
5455
create_test_data,
5556
has_cftime,
5657
has_dask,
@@ -96,11 +97,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
9697
nt2 = 2
9798
time1 = pd.date_range("2000-01-01", periods=nt1)
9899
time2 = pd.date_range("2000-02-01", periods=nt2)
99-
string_var = np.array(["ae", "bc", "df"], dtype=object)
100+
string_var = np.array(["a", "bc", "def"], dtype=object)
100101
string_var_to_append = np.array(["asdf", "asdfg"], dtype=object)
101102
string_var_fixed_length = np.array(["aa", "bb", "cc"], dtype="|S2")
102103
string_var_fixed_length_to_append = np.array(["dd", "ee"], dtype="|S2")
103-
unicode_var = ["áó", "áó", "áó"]
104+
unicode_var = np.array(["áó", "áó", "áó"])
104105
datetime_var = np.array(
105106
["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[s]"
106107
)
@@ -119,17 +120,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
119120
coords=[lat, lon, time1],
120121
dims=["lat", "lon", "time"],
121122
),
122-
"string_var": xr.DataArray(string_var, coords=[time1], dims=["time"]),
123-
"string_var_fixed_length": xr.DataArray(
124-
string_var_fixed_length, coords=[time1], dims=["time"]
125-
),
126-
"unicode_var": xr.DataArray(
127-
unicode_var, coords=[time1], dims=["time"]
128-
).astype(np.str_),
129-
"datetime_var": xr.DataArray(
130-
datetime_var, coords=[time1], dims=["time"]
131-
),
132-
"bool_var": xr.DataArray(bool_var, coords=[time1], dims=["time"]),
123+
"string_var": ("time", string_var),
124+
"string_var_fixed_length": ("time", string_var_fixed_length),
125+
"unicode_var": ("time", unicode_var),
126+
"datetime_var": ("time", datetime_var),
127+
"bool_var": ("time", bool_var),
133128
}
134129
)
135130

@@ -140,21 +135,11 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
140135
coords=[lat, lon, time2],
141136
dims=["lat", "lon", "time"],
142137
),
143-
"string_var": xr.DataArray(
144-
string_var_to_append, coords=[time2], dims=["time"]
145-
),
146-
"string_var_fixed_length": xr.DataArray(
147-
string_var_fixed_length_to_append, coords=[time2], dims=["time"]
148-
),
149-
"unicode_var": xr.DataArray(
150-
unicode_var[:nt2], coords=[time2], dims=["time"]
151-
).astype(np.str_),
152-
"datetime_var": xr.DataArray(
153-
datetime_var_to_append, coords=[time2], dims=["time"]
154-
),
155-
"bool_var": xr.DataArray(
156-
bool_var_to_append, coords=[time2], dims=["time"]
157-
),
138+
"string_var": ("time", string_var_to_append),
139+
"string_var_fixed_length": ("time", string_var_fixed_length_to_append),
140+
"unicode_var": ("time", unicode_var[:nt2]),
141+
"datetime_var": ("time", datetime_var_to_append),
142+
"bool_var": ("time", bool_var_to_append),
158143
}
159144
)
160145

@@ -168,8 +153,9 @@ def create_append_test_data(seed=None) -> tuple[Dataset, Dataset, Dataset]:
168153
}
169154
)
170155

171-
assert all(objp.data.flags.writeable for objp in ds.variables.values())
172-
assert all(objp.data.flags.writeable for objp in ds_to_append.variables.values())
156+
assert_writeable(ds)
157+
assert_writeable(ds_to_append)
158+
assert_writeable(ds_with_new_var)
173159
return ds, ds_to_append, ds_with_new_var
174160

175161

@@ -182,10 +168,8 @@ def make_datasets(data, data_to_append) -> tuple[Dataset, Dataset]:
182168
ds_to_append = xr.Dataset(
183169
{"temperature": (["time"], data_to_append)}, coords={"time": [0, 1, 2]}
184170
)
185-
assert all(objp.data.flags.writeable for objp in ds.variables.values())
186-
assert all(
187-
objp.data.flags.writeable for objp in ds_to_append.variables.values()
188-
)
171+
assert_writeable(ds)
172+
assert_writeable(ds_to_append)
189173
return ds, ds_to_append
190174

191175
u2_strings = ["ab", "cd", "ef"]
@@ -2964,10 +2948,11 @@ def test_copy_coords(self, deep, expected_orig) -> None:
29642948
name="value",
29652949
).to_dataset()
29662950
ds_cp = ds.copy(deep=deep)
2967-
ds_cp.coords["a"].data[0] = 999
2951+
new_a = np.array([999, 2])
2952+
ds_cp.coords["a"] = ds_cp.a.copy(data=new_a)
29682953

29692954
expected_cp = xr.DataArray(
2970-
xr.IndexVariable("a", np.array([999, 2])),
2955+
xr.IndexVariable("a", new_a),
29712956
coords={"a": [999, 2]},
29722957
dims=["a"],
29732958
)

xarray/tests/test_missing.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,13 @@ def test_interpolate_pd_compat(method, fill_value) -> None:
122122
# for the numpy linear methods.
123123
# see https://github.com/pandas-dev/pandas/issues/55144
124124
# This aligns the pandas output with the xarray output
125-
expected.values[pd.isnull(actual.values)] = np.nan
126-
expected.values[actual.values == fill_value] = fill_value
125+
fixed = expected.values.copy()
126+
fixed[pd.isnull(actual.values)] = np.nan
127+
fixed[actual.values == fill_value] = fill_value
128+
else:
129+
fixed = expected.values
127130

128-
np.testing.assert_allclose(actual.values, expected.values)
131+
np.testing.assert_allclose(actual.values, fixed)
129132

130133

131134
@requires_scipy

xarray/tests/test_variable.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@ def var():
6464
return Variable(dims=list("xyz"), data=np.random.rand(3, 4, 5))
6565

6666

67+
@pytest.mark.parametrize(
68+
"data",
69+
[
70+
np.array(["a", "bc", "def"], dtype=object),
71+
np.array(["2019-01-01", "2019-01-02", "2019-01-03"], dtype="datetime64[ns]"),
72+
],
73+
)
74+
def test_as_compatible_data_writeable(data):
75+
pd.set_option("mode.copy_on_write", True)
76+
# GH8843, ensure writeable arrays for data_vars even with
77+
# pandas copy-on-write mode
78+
assert as_compatible_data(data).flags.writeable
79+
pd.reset_option("mode.copy_on_write")
80+
81+
6782
class VariableSubclassobjects(NamedArraySubclassobjects, ABC):
6883
@pytest.fixture
6984
def target(self, data):

0 commit comments

Comments
 (0)