36
36
from xarray .backends .locks import _get_scheduler
37
37
from xarray .coders import CFDatetimeCoder , CFTimedeltaCoder
38
38
from xarray .core import indexing
39
+ from xarray .core .coordinates import Coordinates
39
40
from xarray .core .dataarray import DataArray
40
41
from xarray .core .dataset import Dataset
41
42
from xarray .core .datatree import DataTree
@@ -379,6 +380,15 @@ def _chunk_ds(
379
380
return backend_ds ._replace (variables )
380
381
381
382
383
+ def _maybe_create_default_indexes (ds ):
384
+ to_index = {
385
+ name : coord .variable
386
+ for name , coord in ds .coords .items ()
387
+ if coord .dims == (name ,) and name not in ds .xindexes
388
+ }
389
+ return ds .assign_coords (Coordinates (to_index ))
390
+
391
+
382
392
def _dataset_from_backend_dataset (
383
393
backend_ds ,
384
394
filename_or_obj ,
@@ -389,6 +399,7 @@ def _dataset_from_backend_dataset(
389
399
inline_array ,
390
400
chunked_array_type ,
391
401
from_array_kwargs ,
402
+ create_default_indexes ,
392
403
** extra_tokens ,
393
404
):
394
405
if not isinstance (chunks , int | dict ) and chunks not in {None , "auto" }:
@@ -397,11 +408,15 @@ def _dataset_from_backend_dataset(
397
408
)
398
409
399
410
_protect_dataset_variables_inplace (backend_ds , cache )
400
- if chunks is None :
401
- ds = backend_ds
411
+
412
+ if create_default_indexes :
413
+ ds = _maybe_create_default_indexes (backend_ds )
402
414
else :
415
+ ds = backend_ds
416
+
417
+ if chunks is not None :
403
418
ds = _chunk_ds (
404
- backend_ds ,
419
+ ds ,
405
420
filename_or_obj ,
406
421
engine ,
407
422
chunks ,
@@ -434,6 +449,7 @@ def _datatree_from_backend_datatree(
434
449
inline_array ,
435
450
chunked_array_type ,
436
451
from_array_kwargs ,
452
+ create_default_indexes ,
437
453
** extra_tokens ,
438
454
):
439
455
if not isinstance (chunks , int | dict ) and chunks not in {None , "auto" }:
@@ -442,9 +458,11 @@ def _datatree_from_backend_datatree(
442
458
)
443
459
444
460
_protect_datatree_variables_inplace (backend_tree , cache )
445
- if chunks is None :
446
- tree = backend_tree
461
+ if create_default_indexes :
462
+ tree = backend_tree . map_over_datasets ( _maybe_create_default_indexes )
447
463
else :
464
+ tree = backend_tree
465
+ if chunks is not None :
448
466
tree = DataTree .from_dict (
449
467
{
450
468
path : _chunk_ds (
@@ -459,11 +477,12 @@ def _datatree_from_backend_datatree(
459
477
node = path ,
460
478
** extra_tokens ,
461
479
)
462
- for path , [node ] in group_subtrees (backend_tree )
480
+ for path , [node ] in group_subtrees (tree )
463
481
},
464
- name = backend_tree .name ,
482
+ name = tree .name ,
465
483
)
466
484
485
+ if create_default_indexes or chunks is not None :
467
486
for path , [node ] in group_subtrees (backend_tree ):
468
487
tree [path ].set_close (node ._close )
469
488
@@ -497,6 +516,7 @@ def open_dataset(
497
516
concat_characters : bool | Mapping [str , bool ] | None = None ,
498
517
decode_coords : Literal ["coordinates" , "all" ] | bool | None = None ,
499
518
drop_variables : str | Iterable [str ] | None = None ,
519
+ create_default_indexes : bool = True ,
500
520
inline_array : bool = False ,
501
521
chunked_array_type : str | None = None ,
502
522
from_array_kwargs : dict [str , Any ] | None = None ,
@@ -610,6 +630,13 @@ def open_dataset(
610
630
A variable or list of variables to exclude from being parsed from the
611
631
dataset. This may be useful to drop variables with problems or
612
632
inconsistent values.
633
+ create_default_indexes : bool, default: True
634
+ If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
635
+ which loads the coordinate data into memory. Set it to False if you want to avoid loading
636
+ data into memory.
637
+
638
+ Note that backends can still choose to create other indexes. If you want to control that,
639
+ please refer to the backend's documentation.
613
640
inline_array: bool, default: False
614
641
How to include the array in the dask task graph.
615
642
By default(``inline_array=False``) the array is included in a task by
@@ -702,6 +729,7 @@ def open_dataset(
702
729
chunked_array_type ,
703
730
from_array_kwargs ,
704
731
drop_variables = drop_variables ,
732
+ create_default_indexes = create_default_indexes ,
705
733
** decoders ,
706
734
** kwargs ,
707
735
)
@@ -725,6 +753,7 @@ def open_dataarray(
725
753
concat_characters : bool | None = None ,
726
754
decode_coords : Literal ["coordinates" , "all" ] | bool | None = None ,
727
755
drop_variables : str | Iterable [str ] | None = None ,
756
+ create_default_indexes : bool = True ,
728
757
inline_array : bool = False ,
729
758
chunked_array_type : str | None = None ,
730
759
from_array_kwargs : dict [str , Any ] | None = None ,
@@ -833,6 +862,13 @@ def open_dataarray(
833
862
A variable or list of variables to exclude from being parsed from the
834
863
dataset. This may be useful to drop variables with problems or
835
864
inconsistent values.
865
+ create_default_indexes : bool, default: True
866
+ If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
867
+ which loads the coordinate data into memory. Set it to False if you want to avoid loading
868
+ data into memory.
869
+
870
+ Note that backends can still choose to create other indexes. If you want to control that,
871
+ please refer to the backend's documentation.
836
872
inline_array: bool, default: False
837
873
How to include the array in the dask task graph.
838
874
By default(``inline_array=False``) the array is included in a task by
@@ -890,6 +926,7 @@ def open_dataarray(
890
926
chunks = chunks ,
891
927
cache = cache ,
892
928
drop_variables = drop_variables ,
929
+ create_default_indexes = create_default_indexes ,
893
930
inline_array = inline_array ,
894
931
chunked_array_type = chunked_array_type ,
895
932
from_array_kwargs = from_array_kwargs ,
@@ -946,6 +983,7 @@ def open_datatree(
946
983
concat_characters : bool | Mapping [str , bool ] | None = None ,
947
984
decode_coords : Literal ["coordinates" , "all" ] | bool | None = None ,
948
985
drop_variables : str | Iterable [str ] | None = None ,
986
+ create_default_indexes : bool = True ,
949
987
inline_array : bool = False ,
950
988
chunked_array_type : str | None = None ,
951
989
from_array_kwargs : dict [str , Any ] | None = None ,
@@ -1055,6 +1093,13 @@ def open_datatree(
1055
1093
A variable or list of variables to exclude from being parsed from the
1056
1094
dataset. This may be useful to drop variables with problems or
1057
1095
inconsistent values.
1096
+ create_default_indexes : bool, default: True
1097
+ If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
1098
+ which loads the coordinate data into memory. Set it to False if you want to avoid loading
1099
+ data into memory.
1100
+
1101
+ Note that backends can still choose to create other indexes. If you want to control that,
1102
+ please refer to the backend's documentation.
1058
1103
inline_array: bool, default: False
1059
1104
How to include the array in the dask task graph.
1060
1105
By default(``inline_array=False``) the array is included in a task by
@@ -1148,6 +1193,7 @@ def open_datatree(
1148
1193
chunked_array_type ,
1149
1194
from_array_kwargs ,
1150
1195
drop_variables = drop_variables ,
1196
+ create_default_indexes = create_default_indexes ,
1151
1197
** decoders ,
1152
1198
** kwargs ,
1153
1199
)
@@ -1175,6 +1221,7 @@ def open_groups(
1175
1221
concat_characters : bool | Mapping [str , bool ] | None = None ,
1176
1222
decode_coords : Literal ["coordinates" , "all" ] | bool | None = None ,
1177
1223
drop_variables : str | Iterable [str ] | None = None ,
1224
+ create_default_indexes : bool = True ,
1178
1225
inline_array : bool = False ,
1179
1226
chunked_array_type : str | None = None ,
1180
1227
from_array_kwargs : dict [str , Any ] | None = None ,
@@ -1286,6 +1333,13 @@ def open_groups(
1286
1333
A variable or list of variables to exclude from being parsed from the
1287
1334
dataset. This may be useful to drop variables with problems or
1288
1335
inconsistent values.
1336
+ create_default_indexes : bool, default: True
1337
+ If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
1338
+ which loads the coordinate data into memory. Set it to False if you want to avoid loading
1339
+ data into memory.
1340
+
1341
+ Note that backends can still choose to create other indexes. If you want to control that,
1342
+ please refer to the backend's documentation.
1289
1343
inline_array: bool, default: False
1290
1344
How to include the array in the dask task graph.
1291
1345
By default(``inline_array=False``) the array is included in a task by
@@ -1381,6 +1435,7 @@ def open_groups(
1381
1435
chunked_array_type ,
1382
1436
from_array_kwargs ,
1383
1437
drop_variables = drop_variables ,
1438
+ create_default_indexes = create_default_indexes ,
1384
1439
** decoders ,
1385
1440
** kwargs ,
1386
1441
)
0 commit comments