From b7a33137c2ae1a85755b993ebc8c7100948d3189 Mon Sep 17 00:00:00 2001
From: thodson <thodson@usgs.gov>
Date: Thu, 25 Jul 2024 09:53:53 -0500
Subject: [PATCH 1/7] Add virtual-rechunk example

---
 .../virtual-rechunk/Dockerfile_virtualizarr   | 59 +++++++++++++
 examples/virtual-rechunk/README.md            | 44 ++++++++++
 examples/virtual-rechunk/cubed.yaml           |  7 ++
 examples/virtual-rechunk/lithops.yaml         | 14 ++++
 examples/virtual-rechunk/requirements.txt     | 10 +++
 examples/virtual-rechunk/virtual-rechunk.py   | 84 +++++++++++++++++++
 6 files changed, 218 insertions(+)
 create mode 100644 examples/virtual-rechunk/Dockerfile_virtualizarr
 create mode 100644 examples/virtual-rechunk/README.md
 create mode 100644 examples/virtual-rechunk/cubed.yaml
 create mode 100644 examples/virtual-rechunk/lithops.yaml
 create mode 100644 examples/virtual-rechunk/requirements.txt
 create mode 100644 examples/virtual-rechunk/virtual-rechunk.py

diff --git a/examples/virtual-rechunk/Dockerfile_virtualizarr b/examples/virtual-rechunk/Dockerfile_virtualizarr
new file mode 100644
index 000000000..d1793c6a7
--- /dev/null
+++ b/examples/virtual-rechunk/Dockerfile_virtualizarr
@@ -0,0 +1,59 @@
+# Python 3.11
+FROM python:3.11-slim-buster
+
+
+RUN apt-get update \
+    # Install aws-lambda-cpp build dependencies
+    && apt-get install -y \
+      g++ \
+      make \
+      cmake \
+      unzip \
+    # cleanup package lists, they are not used anymore in this image
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-cache search linux-headers-generic
+
+ARG FUNCTION_DIR="/function"
+
+# Copy function code
+RUN mkdir -p ${FUNCTION_DIR}
+
+# Update pip
+# NB botocore/boto3 are pinned due to https://github.com/boto/boto3/issues/3648
+#    using versions from https://github.com/aio-libs/aiobotocore/blob/72b8dd5d7d4ef2f1a49a0ae0c37b47e5280e2070/setup.py
+#    due to s3fs dependency
+RUN pip install --upgrade --ignore-installed pip wheel six setuptools \
+    && pip install --upgrade --no-cache-dir --ignore-installed \
+        awslambdaric \
+        botocore==1.29.76 \
+        boto3==1.26.76 \
+        redis \
+        httplib2 \
+        requests \
+        numpy \
+        scipy \
+        pandas \
+        pika \
+        kafka-python \
+        cloudpickle \
+        ps-mem \
+        tblib
+
+# Set working directory to function root directory
+WORKDIR ${FUNCTION_DIR}
+
+# Add Lithops
+COPY lithops_lambda.zip ${FUNCTION_DIR}
+RUN unzip lithops_lambda.zip \
+    && rm lithops_lambda.zip \
+    && mkdir handler \
+    && touch handler/__init__.py \
+    && mv entry_point.py handler/
+
+# Put your dependencies here, using RUN pip install... or RUN apt install...
+
+COPY requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+ENTRYPOINT [ "/usr/local/bin/python", "-m", "awslambdaric" ]
+CMD [ "handler.entry_point.lambda_handler" ]
diff --git a/examples/virtual-rechunk/README.md b/examples/virtual-rechunk/README.md
new file mode 100644
index 000000000..9dfd902c6
--- /dev/null
+++ b/examples/virtual-rechunk/README.md
@@ -0,0 +1,44 @@
+# Rechunk a virtual dataset
+
+This example demonstrates how to rechunk a collection of necdf files on s3
+into a single zarr store.
+
+First, lithops and Virtualizarr construct a virtual dataset comprised of the
+netcdf files on s3. Then, xarray-cubed rechunks the virtual dataset into a
+zarr.
+
+## Credits
+Inspired by Pythia's cookbook: https://projectpythia.org/kerchunk-cookbook
+by norlandrhagen.
+
+Please, contribute improvements.
+
+
+
+1. Set up a Python environment
+```bash
+conda create --name virtualizarr-rechunk -y python=3.11
+conda activate virtualizarr-rechunk
+pip install -r requirements.txt
+```
+
+1. Set up cubed executor for [lithops-aws](https://github.com/cubed-dev/cubed/blob/main/examples/lithops/aws/README.md) by editing `./lithops.yaml` with your `bucket` and `execution_role`.
+```bash
+
+1. Build a runtime image for Cubed
+```bash
+export LITHOPS_CONFIG_FILE=$(pwd)/lithops.yaml
+export CUBED_CONFIG=$(pwd)
+lithops runtime build -b aws_lambda -f Dockerfile_virtualizarr virtualizarr-runtime
+```
+
+1. Run the script
+```bash
+python cubed-rechunk.py
+```
+
+## Cleaning up
+To rebuild the Litops image, delete the existing one by running
+```bash
+lithops runtime delete -b aws_lambda -d virtualizarr-runtime
+```
diff --git a/examples/virtual-rechunk/cubed.yaml b/examples/virtual-rechunk/cubed.yaml
new file mode 100644
index 000000000..b4d2173c0
--- /dev/null
+++ b/examples/virtual-rechunk/cubed.yaml
@@ -0,0 +1,7 @@
+spec:
+  work_dir: "s3://cubed-$USER-temp"
+  allowed_mem: "2GB"
+  executor_name: "lithops"
+  executor_options:
+    runtime: "virtualizarr-runtime"
+    runtime_memory: 2000
diff --git a/examples/virtual-rechunk/lithops.yaml b/examples/virtual-rechunk/lithops.yaml
new file mode 100644
index 000000000..b142b4805
--- /dev/null
+++ b/examples/virtual-rechunk/lithops.yaml
@@ -0,0 +1,14 @@
+lithops:
+    backend: aws_lambda
+    storage: aws_s3
+
+aws:
+    region: us-west-2
+
+aws_lambda:
+    execution_role: arn:aws:iam::807615458658:role/lambdaLithopsExecutionRole
+    runtime: virtualizarr-runtime
+    runtime_memory: 2000
+
+aws_s3:
+    bucket: arn:aws:s3:::cubed-thodson-temp
diff --git a/examples/virtual-rechunk/requirements.txt b/examples/virtual-rechunk/requirements.txt
new file mode 100644
index 000000000..dfd39b4b4
--- /dev/null
+++ b/examples/virtual-rechunk/requirements.txt
@@ -0,0 +1,10 @@
+boto
+cftime
+cubed
+cubed-xarray
+h5py
+kerchunk
+lithops
+s3fs
+virtualizarr
+xarray
diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/virtual-rechunk.py
new file mode 100644
index 000000000..67f3a78f0
--- /dev/null
+++ b/examples/virtual-rechunk/virtual-rechunk.py
@@ -0,0 +1,84 @@
+# Rechunk a collection of necdf files on s3 into a single zarr store.
+#
+# First, lithops and Virtualizarr construct a virtual dataset comprised of the
+# netcdf files on s3. Then, xarray-cubed rechunks the virtual dataset into a
+# zarr.
+#
+# Inspired by Pythia's cookbook: https://projectpythia.org/kerchunk-cookbook
+# by norlandrhagen.
+#
+# Please, contribute improvements.
+
+import fsspec
+import lithops
+import xarray as xr
+
+from virtualizarr import open_virtual_dataset
+
+fs_read = fsspec.filesystem("s3", anon=True, skip_instance_cache=True)
+files_paths = fs_read.glob("s3://wrf-se-ak-ar5/ccsm/rcp85/daily/2060/*")
+file_pattern = sorted(["s3://" + f for f in files_paths])
+
+# truncate file_pattern while debugging
+file_pattern = file_pattern[:4]
+
+print(f"{len(file_pattern)} file paths were retrieved.")
+
+
+def map_references(fil):
+    """ Map function to open virtual datasets.
+    """
+    vds = open_virtual_dataset(fil,
+                               indexes={},
+                               loadable_variables=['Time'],
+                               cftime_variables=['Time'],
+                               )
+    return vds
+
+
+def reduce_references(results):
+    """ Reduce to concat virtual datasets.
+
+    """
+    combined_vds = xr.combine_nested(
+        results,
+        concat_dim=['Time'],
+        coords='minimal',
+        compat='override',
+    )
+    # possibly write parquet to s3 here
+    return combined_vds
+
+
+fexec = lithops.FunctionExecutor(config_file="lithops.yaml")
+
+futures = fexec.map_reduce(
+    map_references,
+    file_pattern,
+    reduce_references,
+    spawn_reducer=100,
+)
+
+ds = futures.get_result()
+ds.virtualize.to_kerchunk('combined.json', format='json')
+
+# NOTE: In jupyter, open_dataset seems to cache the json, such that changes
+# aren't propogated until the kernel is restarted.
+combined_ds = xr.open_dataset('combined.json',
+                              engine="kerchunk",
+                              chunks={},
+                              chunked_array_type='cubed',
+                              )
+
+combined_ds['Time'].attrs = {}  # to_zarr complains about attrs
+
+rechunked_ds = combined_ds.chunk(
+    chunks={'Time': 5, 'south_north': 25, 'west_east': 32}
+)
+
+rechunked_ds.to_zarr('rechunked.zarr',
+                     mode='w',
+                     encoding={},  # TODO
+                     consolidated=True,
+                     safe_chunks=False,
+                     )

From 56c4bf80b973b36d74b73e9172a11add91fe90ef Mon Sep 17 00:00:00 2001
From: thodson <thodson@usgs.gov>
Date: Fri, 26 Jul 2024 22:54:36 -0500
Subject: [PATCH 2/7] Set chunked_array_type in .chunk call

---
 examples/virtual-rechunk/virtual-rechunk.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/virtual-rechunk.py
index 67f3a78f0..bf423df63 100644
--- a/examples/virtual-rechunk/virtual-rechunk.py
+++ b/examples/virtual-rechunk/virtual-rechunk.py
@@ -42,9 +42,9 @@ def reduce_references(results):
     """
     combined_vds = xr.combine_nested(
         results,
-        concat_dim=['Time'],
-        coords='minimal',
-        compat='override',
+        concat_dim=["Time"],
+        coords="minimal",
+        compat="override",
     )
     # possibly write parquet to s3 here
     return combined_vds
@@ -60,24 +60,25 @@ def reduce_references(results):
 )
 
 ds = futures.get_result()
-ds.virtualize.to_kerchunk('combined.json', format='json')
+ds.virtualize.to_kerchunk("combined.json", format="json")
 
 # NOTE: In jupyter, open_dataset seems to cache the json, such that changes
 # aren't propogated until the kernel is restarted.
-combined_ds = xr.open_dataset('combined.json',
+combined_ds = xr.open_dataset("combined.json",
                               engine="kerchunk",
                               chunks={},
-                              chunked_array_type='cubed',
+                              chunked_array_type="cubed",
                               )
 
 combined_ds['Time'].attrs = {}  # to_zarr complains about attrs
 
 rechunked_ds = combined_ds.chunk(
-    chunks={'Time': 5, 'south_north': 25, 'west_east': 32}
+    chunks={'Time': 5, 'south_north': 25, 'west_east': 32},
+    chunked_array_type="cubed",
 )
 
-rechunked_ds.to_zarr('rechunked.zarr',
-                     mode='w',
+rechunked_ds.to_zarr("rechunked.zarr",
+                     mode="w",
                      encoding={},  # TODO
                      consolidated=True,
                      safe_chunks=False,

From 351761b3f64621bc106e7c0a9baad2a89871add5 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Wed, 23 Oct 2024 22:46:26 -0500
Subject: [PATCH 3/7] Move manifest to s3 and split workflow

---
 examples/virtual-rechunk/README.md            | 35 ++++---
 .../virtual-rechunk/create-virtualzarr.py     | 59 ++++++++++++
 examples/virtual-rechunk/lithops.yaml         |  2 +-
 examples/virtual-rechunk/requirements.txt     |  2 +-
 examples/virtual-rechunk/virtual-rechunk.py   | 91 ++++---------------
 5 files changed, 104 insertions(+), 85 deletions(-)
 create mode 100755 examples/virtual-rechunk/create-virtualzarr.py

diff --git a/examples/virtual-rechunk/README.md b/examples/virtual-rechunk/README.md
index 9dfd902c6..5e7620ca8 100644
--- a/examples/virtual-rechunk/README.md
+++ b/examples/virtual-rechunk/README.md
@@ -1,11 +1,17 @@
 # Rechunk a virtual dataset
 
-This example demonstrates how to rechunk a collection of necdf files on s3
-into a single zarr store.
+This example demonstrates how to rechunk a collection of necdf files on s3 into a single zarr store.
+
+Most rechunking workflows can be conceptualized in two steps,
+which typically provides greater flexibility than combining them.
+The first (staging) step is mostly embarassingly parallel and prepares the input data.
+In this example, we construct a virtual zarr dataset using `lithops`,
+but we could incorporate data transfer and reprocessing as part of staging.
+
+The second (rechunking) step rechunks the staged data.
+Here, we rechuck the virtual zarr using `cubed`,
+but in theory, `dask` or other map-reduce frameworks may be used. 
 
-First, lithops and Virtualizarr construct a virtual dataset comprised of the
-netcdf files on s3. Then, xarray-cubed rechunks the virtual dataset into a
-zarr.
 
 ## Credits
 Inspired by Pythia's cookbook: https://projectpythia.org/kerchunk-cookbook
@@ -14,7 +20,6 @@ by norlandrhagen.
 Please, contribute improvements.
 
 
-
 1. Set up a Python environment
 ```bash
 conda create --name virtualizarr-rechunk -y python=3.11
@@ -22,23 +27,29 @@ conda activate virtualizarr-rechunk
 pip install -r requirements.txt
 ```
 
-1. Set up cubed executor for [lithops-aws](https://github.com/cubed-dev/cubed/blob/main/examples/lithops/aws/README.md) by editing `./lithops.yaml` with your `bucket` and `execution_role`.
-```bash
+2. Set up cubed executor for [lithops-aws](https://github.com/cubed-dev/cubed/blob/main/examples/lithops/aws/README.md) by editing `./lithops.yaml` with your `bucket` and `execution_role`.
 
-1. Build a runtime image for Cubed
+3. Build a runtime image for `cubed`
 ```bash
 export LITHOPS_CONFIG_FILE=$(pwd)/lithops.yaml
-export CUBED_CONFIG=$(pwd)
+export CUBED_CONFIG=$(pwd)/cubed.yaml
+# create a bucket for storing results
+export BUCKET_URL=s3://wma-uncertainty/scratch
 lithops runtime build -b aws_lambda -f Dockerfile_virtualizarr virtualizarr-runtime
 ```
 
-1. Run the script
+4. Stage the virtual zarr using `lithops`
+```bash
+python create-virtualzarr.py
+```
+
+5. Rechunk the virtual zarr with `cubed` (using `lithops`)
 ```bash
 python cubed-rechunk.py
 ```
 
 ## Cleaning up
-To rebuild the Litops image, delete the existing one by running
+To rebuild the `lithops` image, delete the existing one by running
 ```bash
 lithops runtime delete -b aws_lambda -d virtualizarr-runtime
 ```
diff --git a/examples/virtual-rechunk/create-virtualzarr.py b/examples/virtual-rechunk/create-virtualzarr.py
new file mode 100755
index 000000000..6ebe6810a
--- /dev/null
+++ b/examples/virtual-rechunk/create-virtualzarr.py
@@ -0,0 +1,59 @@
+# Use lithops to construct a virtual zarr from netcdf files on s3.
+
+import fsspec
+import lithops
+import os
+import xarray as xr
+
+from virtualizarr import open_virtual_dataset
+
+bucket_url = os.getenv("BUCKET_URL")
+
+fs_read = fsspec.filesystem("s3", anon=True, skip_instance_cache=True)
+files_paths = fs_read.glob("s3://wrf-se-ak-ar5/ccsm/rcp85/daily/2060/*")
+file_pattern = sorted(["s3://" + f for f in files_paths])
+
+# Truncate file_pattern while debugging
+file_pattern = file_pattern[:4]
+
+print(f"{len(file_pattern)} file paths were retrieved.")
+
+
+def map_references(fil):
+    """ Map function to open virtual datasets.
+    """
+    vds = open_virtual_dataset(
+        fil,
+        indexes={},
+        loadable_variables=['Time'],
+        cftime_variables=['Time'],
+    )
+    return vds
+
+
+def reduce_references(results):
+    """ Reduce to concat virtual datasets.
+    """
+    combined_vds = xr.combine_nested(
+        results,
+        concat_dim=["Time"],
+        coords="minimal",
+        compat="override",
+    )
+
+    return combined_vds
+
+
+fexec = lithops.FunctionExecutor(config_file="lithops.yaml")
+
+futures = fexec.map_reduce(
+    map_references,
+    file_pattern,
+    reduce_references,
+    spawn_reducer=100,
+)
+
+ds = futures.get_result()
+
+# Save the virtual zarr manifest to s3
+ds.virtualize.to_kerchunk(f"{bucket_url}/combined.json", format="json")
diff --git a/examples/virtual-rechunk/lithops.yaml b/examples/virtual-rechunk/lithops.yaml
index b142b4805..3740ac605 100644
--- a/examples/virtual-rechunk/lithops.yaml
+++ b/examples/virtual-rechunk/lithops.yaml
@@ -11,4 +11,4 @@ aws_lambda:
     runtime_memory: 2000
 
 aws_s3:
-    bucket: arn:aws:s3:::cubed-thodson-temp
+    storage_bucket: cubed-thodson-temp
diff --git a/examples/virtual-rechunk/requirements.txt b/examples/virtual-rechunk/requirements.txt
index dfd39b4b4..b19f0b2ec 100644
--- a/examples/virtual-rechunk/requirements.txt
+++ b/examples/virtual-rechunk/requirements.txt
@@ -1,4 +1,4 @@
-boto
+boto3
 cftime
 cubed
 cubed-xarray
diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/virtual-rechunk.py
index bf423df63..4b58551f6 100644
--- a/examples/virtual-rechunk/virtual-rechunk.py
+++ b/examples/virtual-rechunk/virtual-rechunk.py
@@ -1,85 +1,34 @@
-# Rechunk a collection of necdf files on s3 into a single zarr store.
+# Rechunk a virtual zarr on s3 into a single zarr store using xarray-cubed.
 #
-# First, lithops and Virtualizarr construct a virtual dataset comprised of the
-# netcdf files on s3. Then, xarray-cubed rechunks the virtual dataset into a
-# zarr.
+# Prior to running this script, create the virtual zarr with
+# > python create-virtualzarr.py
 #
-# Inspired by Pythia's cookbook: https://projectpythia.org/kerchunk-cookbook
-# by norlandrhagen.
-#
-# Please, contribute improvements.
+# NOTE: In jupyter, open_dataset seems to cache the json, such that changes
+# aren't propogated until the kernel is restarted.
 
-import fsspec
-import lithops
+import os
 import xarray as xr
 
-from virtualizarr import open_virtual_dataset
-
-fs_read = fsspec.filesystem("s3", anon=True, skip_instance_cache=True)
-files_paths = fs_read.glob("s3://wrf-se-ak-ar5/ccsm/rcp85/daily/2060/*")
-file_pattern = sorted(["s3://" + f for f in files_paths])
-
-# truncate file_pattern while debugging
-file_pattern = file_pattern[:4]
-
-print(f"{len(file_pattern)} file paths were retrieved.")
-
-
-def map_references(fil):
-    """ Map function to open virtual datasets.
-    """
-    vds = open_virtual_dataset(fil,
-                               indexes={},
-                               loadable_variables=['Time'],
-                               cftime_variables=['Time'],
-                               )
-    return vds
-
-
-def reduce_references(results):
-    """ Reduce to concat virtual datasets.
+bucket_url = os.getenv("BUCKET_URL")
 
-    """
-    combined_vds = xr.combine_nested(
-        results,
-        concat_dim=["Time"],
-        coords="minimal",
-        compat="override",
-    )
-    # possibly write parquet to s3 here
-    return combined_vds
-
-
-fexec = lithops.FunctionExecutor(config_file="lithops.yaml")
-
-futures = fexec.map_reduce(
-    map_references,
-    file_pattern,
-    reduce_references,
-    spawn_reducer=100,
+combined_ds = xr.open_dataset(
+    f"{bucket_url}/combined.json", # location must be accessible to workers
+    engine="kerchunk",
+    chunks={},
+    chunked_array_type="cubed",
 )
 
-ds = futures.get_result()
-ds.virtualize.to_kerchunk("combined.json", format="json")
-
-# NOTE: In jupyter, open_dataset seems to cache the json, such that changes
-# aren't propogated until the kernel is restarted.
-combined_ds = xr.open_dataset("combined.json",
-                              engine="kerchunk",
-                              chunks={},
-                              chunked_array_type="cubed",
-                              )
-
-combined_ds['Time'].attrs = {}  # to_zarr complains about attrs
+combined_ds['Time'].attrs = {}  # otherwise to_zarr complains about attrs
 
 rechunked_ds = combined_ds.chunk(
     chunks={'Time': 5, 'south_north': 25, 'west_east': 32},
     chunked_array_type="cubed",
 )
 
-rechunked_ds.to_zarr("rechunked.zarr",
-                     mode="w",
-                     encoding={},  # TODO
-                     consolidated=True,
-                     safe_chunks=False,
-                     )
+rechunked_ds.to_zarr(
+    f"{bucket_url}/rechunked.zarr",
+    mode="w",
+    encoding={},  # TODO
+    consolidated=True,
+    safe_chunks=False,
+)

From b35f01e3c3009b96d812d7b42b504a61eae41a28 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 25 Oct 2024 11:46:40 -0500
Subject: [PATCH 4/7] Update requirements.txt

---
 examples/virtual-rechunk/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/virtual-rechunk/requirements.txt b/examples/virtual-rechunk/requirements.txt
index b19f0b2ec..368494255 100644
--- a/examples/virtual-rechunk/requirements.txt
+++ b/examples/virtual-rechunk/requirements.txt
@@ -2,6 +2,7 @@ boto3
 cftime
 cubed
 cubed-xarray
+h5netcdf
 h5py
 kerchunk
 lithops

From ee0dfcf47a4490bc4480d95371912903cd9b1561 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 25 Oct 2024 11:46:58 -0500
Subject: [PATCH 5/7] Move manifest to s3

---
 examples/virtual-rechunk/create-virtualzarr.py | 10 +++++++---
 examples/virtual-rechunk/virtual-rechunk.py    |  6 +++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/examples/virtual-rechunk/create-virtualzarr.py b/examples/virtual-rechunk/create-virtualzarr.py
index 6ebe6810a..4af14effa 100755
--- a/examples/virtual-rechunk/create-virtualzarr.py
+++ b/examples/virtual-rechunk/create-virtualzarr.py
@@ -40,7 +40,7 @@ def reduce_references(results):
         coords="minimal",
         compat="override",
     )
-
+    
     return combined_vds
 
 
@@ -55,5 +55,9 @@ def reduce_references(results):
 
 ds = futures.get_result()
 
-# Save the virtual zarr manifest to s3
-ds.virtualize.to_kerchunk(f"{bucket_url}/combined.json", format="json")
+# Save the virtual zarr manifest
+ds.virtualize.to_kerchunk(f"combined.json", format="json")
+
+# Upload manifest to s3
+fs_write = fsspec.filesystem("s3", anon=False, skip_instance_cache=True)
+fs_write.put("combined.json", f"{bucket_url}/combined.json")
diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/virtual-rechunk.py
index 4b58551f6..c86b2ad82 100644
--- a/examples/virtual-rechunk/virtual-rechunk.py
+++ b/examples/virtual-rechunk/virtual-rechunk.py
@@ -7,10 +7,14 @@
 # aren't propogated until the kernel is restarted.
 
 import os
+import fsspec
 import xarray as xr
 
 bucket_url = os.getenv("BUCKET_URL")
 
+target = fsspec.get_mapper(f"{bucket_url}/rechunked.zarr")
+                           # client_kwargs={'region_name':'us-west-2'})
+
 combined_ds = xr.open_dataset(
     f"{bucket_url}/combined.json", # location must be accessible to workers
     engine="kerchunk",
@@ -26,7 +30,7 @@
 )
 
 rechunked_ds.to_zarr(
-    f"{bucket_url}/rechunked.zarr",
+    target,
     mode="w",
     encoding={},  # TODO
     consolidated=True,

From 7d9d65472497b0f741a1fa2e2eed79f375084377 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 25 Oct 2024 13:42:18 -0500
Subject: [PATCH 6/7] Update to_zarr bucket url

---
 examples/virtual-rechunk/virtual-rechunk.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/virtual-rechunk.py
index c86b2ad82..7c4cbee25 100644
--- a/examples/virtual-rechunk/virtual-rechunk.py
+++ b/examples/virtual-rechunk/virtual-rechunk.py
@@ -7,13 +7,10 @@
 # aren't propogated until the kernel is restarted.
 
 import os
-import fsspec
 import xarray as xr
 
-bucket_url = os.getenv("BUCKET_URL")
 
-target = fsspec.get_mapper(f"{bucket_url}/rechunked.zarr")
-                           # client_kwargs={'region_name':'us-west-2'})
+bucket_url = os.getenv("BUCKET_URL")
 
 combined_ds = xr.open_dataset(
     f"{bucket_url}/combined.json", # location must be accessible to workers
@@ -30,7 +27,7 @@
 )
 
 rechunked_ds.to_zarr(
-    target,
+    f"{bucket_url}/rechunked.zarr",
     mode="w",
     encoding={},  # TODO
     consolidated=True,

From 63dca1ca5030d60a658f9df4ee57841e7e1096c4 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 25 Oct 2024 14:38:21 -0500
Subject: [PATCH 7/7] Rename scripts

---
 examples/virtual-rechunk/README.md                            | 4 ++--
 .../{virtual-rechunk.py => rechunk-virtual-zarr.py}           | 0
 .../{create-virtualzarr.py => stage-virtual-zarr.py}          | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename examples/virtual-rechunk/{virtual-rechunk.py => rechunk-virtual-zarr.py} (100%)
 rename examples/virtual-rechunk/{create-virtualzarr.py => stage-virtual-zarr.py} (100%)

diff --git a/examples/virtual-rechunk/README.md b/examples/virtual-rechunk/README.md
index 5e7620ca8..15cc954ed 100644
--- a/examples/virtual-rechunk/README.md
+++ b/examples/virtual-rechunk/README.md
@@ -40,12 +40,12 @@ lithops runtime build -b aws_lambda -f Dockerfile_virtualizarr virtualizarr-runt
 
 4. Stage the virtual zarr using `lithops`
 ```bash
-python create-virtualzarr.py
+python stage-virtual-zarr.py
 ```
 
 5. Rechunk the virtual zarr with `cubed` (using `lithops`)
 ```bash
-python cubed-rechunk.py
+python rechunk-virtual-zarr.py
 ```
 
 ## Cleaning up
diff --git a/examples/virtual-rechunk/virtual-rechunk.py b/examples/virtual-rechunk/rechunk-virtual-zarr.py
similarity index 100%
rename from examples/virtual-rechunk/virtual-rechunk.py
rename to examples/virtual-rechunk/rechunk-virtual-zarr.py
diff --git a/examples/virtual-rechunk/create-virtualzarr.py b/examples/virtual-rechunk/stage-virtual-zarr.py
similarity index 100%
rename from examples/virtual-rechunk/create-virtualzarr.py
rename to examples/virtual-rechunk/stage-virtual-zarr.py