Reduce dask tokenization time (#8339)

martindurant · dcherian · Illviljan · web-flow · commit 86b4167da6da · 2023-10-20T17:13:43.000-06:00
* Reduce dask tokenization time

* Add comment

---------

Co-authored-by: Deepak Cherian &lt;dcherian@users.noreply.github.com&gt;
Co-authored-by: Illviljan &lt;14371165+Illviljan@users.noreply.github.com&gt;
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -309,7 +309,8 @@ def _maybe_chunk(
             # when rechunking by different amounts, make sure dask names change
             # by providing chunks as an input to tokenize.
             # subtle bugs result otherwise. see GH3350
-            token2 = tokenize(name, token if token else var._data, chunks)
+            # we use str() for speed, and use the name for the final array name on the next line
+            token2 = tokenize(token if token else var._data, str(chunks))
             name2 = f"{name_prefix}{name}-{token2}"
 
             from_array_kwargs = utils.consolidate_dask_from_array_kwargs(