diff --git a/.github/ISSUE_TEMPLATE/capability.md b/.github/ISSUE_TEMPLATE/capability.md
new file mode 100644
index 0000000..41378f6
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/capability.md
@@ -0,0 +1,38 @@
+---
+name: JIRA Story/Capability
+about: The structure for outlining work being done on a JIRA story
+labels: JIRA Story
+---
+
+# Capability
+
+
+
+## Task
+
+
+
+## Plan/Outline
+
+
+
+
+### TODOS
+
+
+- [ ]
+
+### Additional components / Context
+
+
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 69d40bb..0e66a7d 100755
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,3 +1,4 @@
+<<<<<<< HEAD
[Short description explaining the high-level reason for the pull request]
## Additions
@@ -61,3 +62,36 @@
- [ ] Flexible from small to large screens
- [ ] No linting errors or warnings
- [ ] JavaScript tests are passing
+=======
+## Issue Addressed
+
+
+
+Fixes # (issue number)
+
+## Description
+
+
+
+## Type of Change
+
+- [ ] Bug fix (non-breaking change which fixes an issue)
+- [ ] New feature (non-breaking change which adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Code cleanup/refactor
+- [ ] Documentation update
+
+Other (please specify):
+
+## Checklist
+
+- [ ] Branch is up to date with master
+- [ ] Updated tests or added new tests
+- [ ] Tests & pre-commit hooks pass
+- [ ] Updated documentation (if applicable)
+- [ ] Code follows established style and conventions
+>>>>>>> tmp-branch
diff --git a/.gitignore b/.gitignore
index f5e8694..5760d76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,77 +1,210 @@
-# Compiled source #
-###################
-*.com
-*.class
-*.dll
-*.exe
-*.o
-*.so
-_site/
-
-# Packages #
-############
-# it's better to unpack these files and commit the raw source
-# git has its own built in compression methods
-*.7z
-*.dmg
-*.gz
-*.iso
-*.jar
-*.rar
-*.tar
-*.zip
-
-# Logs and databases #
-######################
-*.log
-*.sql
-*.sqlite
-
-# OS generated files #
-######################
-.DS_Store
-.DS_Store?
-.Spotlight-V100
-.Trashes
-Icon?
-ehthumbs.db
-Thumbs.db
-
-# Vim swap files #
-##################
-*.swp
-
-# Python #
-#################
-*.pyc
-*.egg-info/
+# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
-.env
-.python-version
+*$py.class
-# pyenv #
-#########
-.python-version
+# C extensions
+*.so
-# Django #
-#################
-*.egg-info
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
.installed.cfg
+*.egg
+MANIFEST
+.DS_Store
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
# Unit test / coverage reports
-#################
htmlcov/
.tox/
+.nox/
.coverage
+.coverage.*
.cache
nosetests.xml
coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
-# Front-End #
-#############
-node_modules/
-bower_components/
-.grunt/
-src/vendor/
-dist/
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+Pipfile
+
+.qodo
+src/icefabric/_version.py
+*.db
+*.zarr
+.zarr
+*.tiff
+*.tif
+examples/iceberg_catalog/metadata/
+
+# Local Terraform state
+*.tfstate
+*.tfstate.backup
+
+# Crash log files
+crash.log
+
+# CLI configuration
+.terraform/
+
+# Terraform plan output (optional unless you want to inspect later)
+*.tfplan
+
+# tiles
+*.pmtiles
+examples/icechunk_data_viewer/martin/tiles/quantiles/*
+examples/icechunk_data_viewer/martin/tiles/legends/*
+examples/icechunk_data_viewer/martin/tiles/legend.png
+tests/data/topo_tifs
+
+# hydrofabric parquets
+data/hydrofabric
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..4dbe867
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,31 @@
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ - id: trailing-whitespace
+ exclude: LICENSE|\.csv$
+ - id: end-of-file-fixer
+ exclude: LICENSE|\.csv$
+ - id: check-yaml
+ exclude: mkdocs.yml$
+ - id: debug-statements
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ # Ruff version.
+ rev: v0.12.3
+ hooks:
+ # Run the linter.
+ - id: ruff
+ args: [ --fix ]
+ # Run the formatter.
+ - id: ruff-format
+
+ - repo: https://github.com/astral-sh/uv-pre-commit
+ rev: 0.7.20
+ hooks:
+ - id: uv-lock
+
+ - repo: https://github.com/kynan/nbstripout
+ rev: 0.8.1
+ hooks:
+ - id: nbstripout
diff --git a/.pyiceberg.yaml b/.pyiceberg.yaml
new file mode 100644
index 0000000..4c42331
--- /dev/null
+++ b/.pyiceberg.yaml
@@ -0,0 +1,11 @@
+catalog:
+ glue:
+ type: glue
+ s3.endpoint: s3.us-east-1.amazonaws.com
+ warehouse: s3://52fcde3e7-5582-477d-7686ou4ij1ptxj8equ83a5xc51fsuse1b--table-s3
+ region: us-east-1
+ glue_region: us-east-1
+ sql:
+ type: sql
+ uri: sqlite:////tmp/warehouse/pyiceberg_catalog.db
+ warehouse: file:///tmp/warehouse
diff --git a/LICENSE b/LICENSE
index e8ab96d..5b06463 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,14 @@
+Copyright 2025 Raytheon Company
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+Licensed under: https://opensource.org/license/bsd-2-clause
+
+- - - - - - - - - - - - - -
+
Apache License
Version 2.0, January 2004
diff --git a/NOTICE.txt b/NOTICE.txt
new file mode 100644
index 0000000..4c987a6
--- /dev/null
+++ b/NOTICE.txt
@@ -0,0 +1,8 @@
+--
+
+The Hydrofabric data used in this repo is credited to Lynker and its contributors
+
+Johnson, J. M. (2022). National Hydrologic Geospatial Fabric (hydrofabric) for the Next Generation (NextGen) Hydrologic Modeling Framework,
+HydroShare http://www.hydroshare.org/resource/129787b468aa4d55ace7b124ed27dbde
+
+--
diff --git a/README.md b/README.md
index 2159d62..fb3d4e7 100644
--- a/README.md
+++ b/README.md
@@ -1,109 +1,58 @@
-#### OWP Open Source Project Template Instructions
+# icefabric
-1. Create a new project.
-2. [Copy these files into the new project](#installation)
-3. Update the README, replacing the contents below as prescribed.
-4. Add any libraries, assets, or hard dependencies whose source code will be included
- in the project's repository to the _Exceptions_ section in the [TERMS](TERMS.md).
- - If no exceptions are needed, remove that section from TERMS.
-5. If working with an existing code base, answer the questions on the [open source checklist](opensource-checklist.md)
-6. Delete these instructions and everything up to the _Project Title_ from the README.
-7. Write some great software and tell people about it.
+
-> Keep the README fresh! It's the first thing people see and will make the initial impression.
+[](https://github.com/astral-sh/ruff)
-## Installation
-To install all of the template files, run the following script from the root of your project's directory:
+An [Apache Iceberg](https://py.iceberg.apache.org/) implementation of the Hydrofabric to disseminate continental hydrologic data
-```
-bash -c "$(curl -s https://raw.githubusercontent.com/NOAA-OWP/owp-open-source-project-template/open_source_template.sh)"
-```
-
-----
-
-# Project Title
-
-**Description**: Put a meaningful, short, plain-language description of what
-this project is trying to accomplish and why it matters.
-Describe the problem(s) this project solves.
-Describe how this software can improve the lives of its audience.
-
-Other things to include:
-
- - **Technology stack**: Indicate the technological nature of the software, including primary programming language(s) and whether the software is intended as standalone or as a module in a framework or other ecosystem.
- - **Status**: Alpha, Beta, 1.1, etc. It's OK to write a sentence, too. The goal is to let interested people know where this project is at. This is also a good place to link to the [CHANGELOG](CHANGELOG.md).
- - **Links to production or demo instances**
- - Describe what sets this apart from related-projects. Linking to another doc or page is OK if this can't be expressed in a sentence or two.
-
-
-**Screenshot**: If the software has visual components, place a screenshot after the description; e.g.,
-
-
-
-
-## Dependencies
-
-Describe any dependencies that must be installed for this software to work.
-This includes programming languages, databases or other storage mechanisms, build tools, frameworks, and so forth.
-If specific versions of other software are required, or known not to work, call that out.
-
-## Installation
-
-Detailed instructions on how to install, configure, and get the project running.
-This should be frequently tested to ensure reliability. Alternatively, link to
-a separate [INSTALL](INSTALL.md) document.
-
-## Configuration
-
-If the software is configurable, describe it in detail, either here or in other documentation to which you link.
+> [!NOTE]
+> To run any of the functions in this repo your AWS test account credentials need to be in your `.env` file and your `.pyiceberg.yaml` settings need to up to date with `AWS_DEFAULT_REGION="us-east-1"` set
-## Usage
-
-Show users how to use the software.
-Be specific.
-Use appropriate formatting when showing code snippets.
-
-## How to test the software
-
-If the software includes automated tests, detail how to run those tests.
-
-## Known issues
-
-Document any known significant shortcomings with the software.
-
-## Getting help
-
-Instruct users how to get help with this software; this might include links to an issue tracker, wiki, mailing list, etc.
-
-**Example**
-
-If you have questions, concerns, bug reports, etc, please file an issue in this repository's Issue Tracker.
-
-## Getting involved
-
-This section should detail why people should get involved and describe key areas you are
-currently focusing on; e.g., trying to get feedback on features, fixing certain bugs, building
-important pieces, etc.
+### Getting Started
+This repo is managed through [UV](https://docs.astral.sh/uv/getting-started/installation/) and can be installed through:
+```sh
+uv sync
+source .venv/bin/activate
+```
-General instructions on _how_ to contribute should be stated with a link to [CONTRIBUTING](CONTRIBUTING.md).
+### Running the API locally
+To run the API locally, ensure your `.env` file in your project root has the right credentials, then run
+```sh
+python -m app.main
+```
+This should spin up the API services at `localhost:8000/`.
+If you are running the API locally, you can run
+```sh
+python -m app.main --catalog sql
+```
-----
+### Building the API through Docker
+To run the API locally with Docker, ensure your `.env` file in your project root has the right credentials, then run
+```sh
+docker compose -f docker/compose.yaml build --no-cache
+docker compose -f docker/compose.yaml up
+```
+This should spin up the API services
-## Open source licensing info
-These links must be included in the final version of your project README (keep this section,
-as is, but remove this sentence):
+### Development
+To ensure that icefabric follows the specified structure, be sure to install the local dev dependencies and run `pre-commit install`
-1. [TERMS](TERMS.md)
-2. [LICENSE](LICENSE)
+### Documentation
+To build the user guide documentation for Icefabric locally, run the following commands:
+```sh
+uv pip install ".[docs]"
+mkdocs serve -a localhost:8080
+```
+Docs will be spun up at localhost:8080/
+### Pytests
-----
+The `tests` folder is for all testing data so the global confest can pick it up. This allows all tests in the namespace packages to share the same scope without having to reference one another in tests
-## Credits and references
+To run tests, run `pytest -s` from project root.
-1. Projects that inspired you
-2. Related projects
-3. Books, papers, talks, or other sources that have meaningful impact or influence on this project
+To run the subsetter tests, run `pytest --run-slow` as these tests take some time. Otherwise, they will be skipped
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..018ddfc
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1,26 @@
+from fastapi import HTTPException, Request
+from pyiceberg.catalog import Catalog
+
+
+def get_catalog(request: Request) -> Catalog:
+ """Gets the pyiceberg catalog reference from the app state
+
+ Parameters
+ ----------
+ request : Request
+ The FastAPI request object containing the application state
+
+ Returns
+ -------
+ pyiceberg.catalog.Catalog
+ The loaded pyiceberg catalog instance used for querying versioned EDFS data
+
+ Raises
+ ------
+ HTTPException
+ If the catalog is not loaded or not available in the application state.
+ Returns HTTP 500 status code with "Catalog not loaded" detail message.
+ """
+ if not hasattr(request.app.state, "catalog") or request.app.state.catalog is None:
+ raise HTTPException(status_code=500, detail="Catalog not loaded")
+ return request.app.state.catalog
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..c512aee
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,85 @@
+import argparse
+import os
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+import uvicorn
+from fastapi import FastAPI, status
+from pydantic import BaseModel
+from pyiceberg.catalog import load_catalog
+
+from app.routers.hydrofabric.router import api_router as hydrofabric_api_router
+from app.routers.nwm_modules.router import sft_router, topoflow_router
+from app.routers.ras_xs.router import api_router as ras_api_router
+from app.routers.streamflow_observations.router import api_router as streamflow_api_router
+from icefabric.helpers import load_creds
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+ """Loads the iceberg catalog location from an environment variable
+
+ Parameters
+ ----------
+ app: FastAPI
+ The FastAPI app instance
+ """
+ catalog_path = os.getenv("CATALOG_PATH")
+ app.state.catalog = load_catalog(catalog_path)
+ yield
+
+
+app = FastAPI(
+ title="Icefabric API",
+ description="API for accessing iceberg or icechunk data from EDFS services",
+ version="1.0.0",
+ docs_url="/docs",
+ redoc_url="/redoc",
+ lifespan=lifespan,
+)
+
+
+class HealthCheck(BaseModel):
+ """Response model to validate and return when performing a health check."""
+
+ status: str = "OK"
+
+
+# Include routers
+app.include_router(hydrofabric_api_router, prefix="/v1")
+app.include_router(streamflow_api_router, prefix="/v1")
+app.include_router(sft_router, prefix="/v1")
+app.include_router(topoflow_router, prefix="/v1")
+app.include_router(ras_api_router, prefix="/v1")
+
+
+@app.head(
+ "/health",
+ tags=["Health"],
+ summary="Perform a Health Check",
+ response_description="Return HTTP Status Code 200 (OK)",
+ status_code=status.HTTP_200_OK,
+ response_model=HealthCheck,
+)
+def get_health() -> HealthCheck:
+ """Returns a HeatlhCheck for the server"""
+ return HealthCheck(status="OK")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="The FastAPI App instance for querying versioned EDFS data")
+
+ # Glue = S3 Tables; Sql is a local iceberg catalog
+ parser.add_argument(
+ "--catalog",
+ choices=["glue", "sql"],
+ help="The catalog information for querying versioned EDFS data",
+ default="glue",
+ ) # Setting the default to read from S3
+
+ args = parser.parse_args()
+
+ os.environ["CATALOG_PATH"] = args.catalog
+
+ load_creds(dir=Path.cwd())
+ uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True, log_level="info")
diff --git a/app/routers/__init__.py b/app/routers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/hydrofabric/__init__.py b/app/routers/hydrofabric/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/hydrofabric/router.py b/app/routers/hydrofabric/router.py
new file mode 100644
index 0000000..4ef0d18
--- /dev/null
+++ b/app/routers/hydrofabric/router.py
@@ -0,0 +1,80 @@
+import pathlib
+import tempfile
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException, Path, Query
+from fastapi.responses import FileResponse
+from starlette.background import BackgroundTask
+
+from app import get_catalog
+from icefabric.hydrofabric import subset
+from icefabric.schemas import HydrofabricDomains, IdType
+
+api_router = APIRouter(prefix="/hydrofabric")
+
+
+@api_router.get("/{identifier}/gpkg")
+async def get_hydrofabric_subset_gpkg(
+ identifier: str = Path(
+ ...,
+ description="Identifier to start tracing from (e.g., catchment ID, POI ID)",
+ examples=["01010000"],
+ openapi_examples={"station_example": {"summary": "USGS Gauge", "value": "01010000"}},
+ ),
+ domain: HydrofabricDomains = Query(
+ HydrofabricDomains.CONUS, description="The iceberg namespace used to query the hydrofabric"
+ ),
+ catalog=Depends(get_catalog),
+):
+ """
+ Get hydrofabric subset as a geopackage file (.gpkg)
+
+ This endpoint creates a subset of the hydrofabric data by tracing upstream
+ from a given identifier and returns all related geospatial layers as a
+ downloadable geopackage file.
+ """
+ unique_id = str(uuid.uuid4())[:8]
+ temp_dir = pathlib.Path(tempfile.gettempdir())
+ tmp_path = temp_dir / f"hydrofabric_subset_{identifier}_{unique_id}.gpkg"
+ try:
+ # Create the subset
+ subset(
+ catalog=catalog,
+ identifier=f"gages-{identifier}",
+ id_type=IdType.HL_URI,
+ output_file=tmp_path,
+ domain=domain,
+ )
+
+ if not tmp_path.exists():
+ raise HTTPException(status_code=500, detail=f"Failed to create geopackage file at {tmp_path}")
+ if tmp_path.stat().st_size == 0:
+ tmp_path.unlink(missing_ok=True) # Clean up empty file
+ raise HTTPException(status_code=404, detail=f"No data found for identifier '{identifier}'")
+
+ # Verify it's actually a file, not a directory
+ if not tmp_path.is_file():
+ raise HTTPException(status_code=500, detail=f"Expected file but got directory at {tmp_path}")
+
+ print(f"Returning file: {tmp_path} (size: {tmp_path.stat().st_size} bytes)")
+
+ download_filename = f"hydrofabric_subset_{identifier}.gpkg"
+
+ return FileResponse(
+ path=str(tmp_path),
+ filename=download_filename,
+ media_type="application/geopackage+sqlite3",
+ headers={
+ "Content-Description": "Hydrofabric Subset Geopackage",
+ "X-Identifier": identifier,
+ },
+ background=BackgroundTask(lambda: tmp_path.unlink(missing_ok=True)),
+ )
+
+ except HTTPException:
+ raise
+ except Exception:
+ # Clean up temp file if it exists
+ if "tmp_path" in locals() and tmp_path.exists():
+ tmp_path.unlink(missing_ok=True)
+ raise
diff --git a/app/routers/nwm_modules/__init__.py b/app/routers/nwm_modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/nwm_modules/router.py b/app/routers/nwm_modules/router.py
new file mode 100644
index 0000000..f519b5f
--- /dev/null
+++ b/app/routers/nwm_modules/router.py
@@ -0,0 +1,74 @@
+from fastapi import APIRouter, Depends, Query
+from pyiceberg.catalog import Catalog
+
+from app import get_catalog
+from icefabric.modules import get_sft_parameters
+from icefabric.schemas import SFT, Albedo, HydrofabricDomains
+
+sft_router = APIRouter(prefix="/modules/sft")
+topoflow_router = APIRouter(prefix="/modules/topoflow")
+
+
+@sft_router.get("/")
+async def get_sft_ipes(
+ identifier: str = Query(
+ ...,
+ description="Gauge ID to trace upstream catchments from",
+ examples=["01010000"],
+ openapi_examples={"sft_example": {"summary": "SFT Example", "value": "01010000"}},
+ ),
+ domain: HydrofabricDomains = Query(
+ HydrofabricDomains.CONUS,
+ description="The iceberg namespace used to query the hydrofabric",
+ openapi_examples={"sft_example": {"summary": "SFT Example", "value": "conus_hf"}},
+ ),
+ use_schaake: bool = Query(
+ False,
+ description="Whether to use Schaake for the Ice Fraction Scheme. Defaults to False to use Xinanjiang",
+ openapi_examples={"sft_example": {"summary": "SFT Example", "value": False}},
+ ),
+ catalog: Catalog = Depends(get_catalog),
+) -> list[SFT]:
+ """
+ An endpoint to return configurations for SFT.
+
+ This endpoint traces upstream from a given gauge ID to get all catchments
+ and returns SFT (Soil Freeze-Thaw) parameter configurations for each catchment.
+
+ **Parameters:**
+ - **identifier**: The Gauge ID to trace upstream from to get all catchments
+ - **domain**: The geographic domain to search for catchments from
+ - **use_schaake**: Determines if we're using Schaake or Xinanjiang to calculate ice fraction
+
+ **Returns:**
+ A list of SFT pydantic objects for each catchment
+ """
+ return get_sft_parameters(
+ catalog=catalog,
+ domain=domain,
+ identifier=identifier,
+ use_schaake=use_schaake,
+ )
+
+
+@topoflow_router.get("/albedo")
+async def get_albedo(
+ landcover_state: Albedo = Query(
+ ...,
+ description="The landcover state of a catchment for albedo classification",
+ examples=["snow"],
+ openapi_examples={"albedo_example": {"summary": "Albedo Example", "value": "snow"}},
+ ),
+) -> float:
+ """
+ An endpoint to return albedo values for TopoFlow Glacier module.
+
+ This endpoint matches a catchment's land cover class ("snow", "ice", "other) with an albedo value [0, 1]
+
+ **Parameters:**
+ - **landcover_state**: Land cover state: "snow", "ice", or "other"
+
+ **Returns:**
+ A float albedo value [0, 1]
+ """
+ return Albedo.get_landcover_albedo(landcover_state.landcover).value
diff --git a/app/routers/ras_xs/__init__.py b/app/routers/ras_xs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/ras_xs/router.py b/app/routers/ras_xs/router.py
new file mode 100644
index 0000000..d6318df
--- /dev/null
+++ b/app/routers/ras_xs/router.py
@@ -0,0 +1,150 @@
+import pathlib
+import tempfile
+import uuid
+
+from fastapi import APIRouter, HTTPException, Path, Query
+from fastapi.responses import FileResponse
+from pyiceberg.catalog import load_catalog
+from starlette.background import BackgroundTask
+
+from icefabric.ras_xs import subset_xs
+from icefabric.schemas import XsType
+
+api_router = APIRouter(prefix="/ras_xs")
+
+
+@api_router.get("/{identifier}/")
+async def get_xs_subset_gpkg(
+ identifier: str = Path(
+ ...,
+ description="HUC-8 identifier to filter by huc ID",
+ examples=["02040106"],
+ openapi_examples={"huc": {"summary": "XS Example", "value": "02040106"}},
+ ),
+ xstype: XsType = Query(XsType.MIP, description="The iceberg namespace used to query the cross-sections"),
+):
+ """
+ Get geopackage subset from the mip xs iceberg catalog by table identifier (aka huc ID).
+
+ This endpoint will query cross-sections from the mip xs iceberg catalog by huc & return
+ the data subset as a downloadable geopackage file.
+
+ """
+ catalog = load_catalog("glue")
+ unique_id = str(uuid.uuid4())[:8]
+ temp_dir = pathlib.Path(tempfile.gettempdir())
+ tmp_path = temp_dir / f"ras_xs_{identifier}_{unique_id}.gpkg"
+ try:
+ # Create data subset
+ data_gdf = subset_xs(catalog=catalog, identifier=f"{identifier}", output_file=tmp_path, xstype=xstype)
+
+ if not tmp_path.exists():
+ raise HTTPException(status_code=500, detail=f"Failed to create geopackage file at {tmp_path}.")
+ if tmp_path.stat().st_size == 0:
+ tmp_path.unlink(missing_ok=True)
+ raise HTTPException(status_code=404, detail=f"No data found for HUC {identifier}.")
+
+ # Verify it's actually a file, not a directory
+ if not tmp_path.is_file():
+ raise HTTPException(status_code=500, detail=f"Expected file, but got directory at {tmp_path}.")
+
+ print(f"Returning file: {tmp_path} (size: {tmp_path.stat().st_size} bytes)")
+
+ download_filename = f"ras_xs_huc{identifier}.gpkg"
+
+ return FileResponse(
+ path=str(tmp_path),
+ filename=download_filename,
+ media_type="application/geopackage+sqlite3",
+ headers={
+ "Data_Source": f"{xstype}_xs",
+ "HUC Identifier": identifier,
+ "Description": f"{xstype} RAS Cross-Section Geopackage",
+ "Total Records": f"{len(data_gdf)}",
+ },
+ background=BackgroundTask(lambda: tmp_path.unlink(missing_ok=True)),
+ )
+ except HTTPException:
+ raise
+ except Exception:
+ # Clean up temp file if it exists
+ if "tmp_path" in locals() and tmp_path.exists():
+ tmp_path.unlink(missing_ok=True)
+ raise
+
+
+@api_router.get("/{identifier}/dsreachid={ds_reach_id}")
+async def get_xs_subset_by_huc_reach_gpkg(
+ identifier: str = Path(
+ ...,
+ description="Identifier to filter data by huc ID",
+ examples=["02040106"],
+ openapi_examples={"xs": {"summary": "XS Example", "value": "02040106"}},
+ ),
+ ds_reach_id: str = Path(
+ ...,
+ description="Identifier to filter data by downstream reach ID)",
+ examples=["4188251"],
+ openapi_examples={"xs": {"summary": "XS Example", "value": "4188251"}},
+ ),
+ xstype: XsType = Query(XsType.MIP, description="The iceberg namespace used to query the cross-sections"),
+):
+ """
+ Get geopackage subset from the mip xs iceberg catalog by reach ID at huc ID.
+
+ This endpoint will query cross-sections from the mip xs iceberg catalog by
+ downstream reach ID at given huc ID -- returning the data subset as a downloadable geopackage file.
+
+ """
+ catalog = load_catalog("glue")
+ unique_id = str(uuid.uuid4())[:8]
+ temp_dir = pathlib.Path(tempfile.gettempdir())
+ tmp_path = temp_dir / f"ras_xs_{identifier}_{ds_reach_id}_{unique_id}.gpkg"
+ try:
+ # Create data subset
+ data_gdf = subset_xs(
+ catalog=catalog,
+ identifier=f"{identifier}",
+ ds_reach_id=f"{ds_reach_id}",
+ output_file=tmp_path,
+ xstype=xstype,
+ )
+
+ if not tmp_path.exists():
+ raise HTTPException(status_code=500, detail=f"Failed to create geopackage file at {tmp_path}.")
+ if tmp_path.stat().st_size == 0:
+ tmp_path.unlink(missing_ok=True)
+ raise HTTPException(
+ status_code=404,
+ detail=f"No data found for downstream reach id {ds_reach_id} @ HUC{identifier}.",
+ )
+
+ # Verify it's actually a file, not a directory
+ if not tmp_path.is_file():
+ raise HTTPException(status_code=500, detail=f"Expected file, but got directory at {tmp_path}.")
+
+ print(f"Returning file: {tmp_path} (size: {tmp_path.stat().st_size} bytes)")
+
+ download_filename = f"ras_xs_huc{identifier}_dsreachid{ds_reach_id}.gpkg"
+
+ return FileResponse(
+ path=str(tmp_path),
+ filename=download_filename,
+ media_type="application/geopackage+sqlite3",
+ headers={
+ "Data_Source": f"{xstype}_xs",
+ "HUC Identifier": identifier,
+ "DS Reach Identifier": ds_reach_id,
+ "Description": f"{xstype} RAS Cross-Section Geopackage",
+ "Total Records": f"{len(data_gdf)}",
+ },
+ background=BackgroundTask(lambda: tmp_path.unlink(missing_ok=True)),
+ )
+
+ except HTTPException:
+ raise
+ except Exception:
+ # Clean up temp file if it exists
+ if "tmp_path" in locals() and tmp_path.exists():
+ tmp_path.unlink(missing_ok=True)
+ raise
diff --git a/app/routers/streamflow_observations/__init__.py b/app/routers/streamflow_observations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/routers/streamflow_observations/router.py b/app/routers/streamflow_observations/router.py
new file mode 100644
index 0000000..f869482
--- /dev/null
+++ b/app/routers/streamflow_observations/router.py
@@ -0,0 +1,343 @@
+import io
+from datetime import datetime
+from enum import Enum
+
+from botocore.exceptions import ClientError
+from fastapi import APIRouter, HTTPException, Path, Query
+from fastapi.responses import Response
+from pyiceberg.catalog import load_catalog
+
+api_router = APIRouter(prefix="/streamflow_observations")
+
+
+# TODO add other gauges used by NWM
+class DataSource(str, Enum):
+ """All observational streamflow sources"""
+
+ USGS = "usgs"
+
+
+# Configuration for each data source
+DATA_SOURCE_CONFIG = {
+ DataSource.USGS: {
+ "namespace": "streamflow_observations",
+ "table": "usgs_hourly",
+ "time_column": "time",
+ "units": "cms",
+ "description": "USGS stream gauge hourly data",
+ },
+}
+
+
+def get_catalog_and_table(data_source: DataSource):
+ """Get catalog and table for a given data source"""
+ config = DATA_SOURCE_CONFIG[data_source]
+ try:
+ catalog = load_catalog("glue")
+ table = catalog.load_table(f"{config['namespace']}.{config['table']}")
+ except ClientError as e:
+ msg = "AWS Test account credentials expired. Can't access remote S3 endpoint"
+ print(msg)
+ raise e
+ return catalog, table, config
+
+
+def validate_identifier(data_source: DataSource, identifier: str):
+ """Check if identifier exists in the dataset"""
+ catalog, table, config = get_catalog_and_table(data_source)
+ schema = table.schema()
+ available_columns = [field.name for field in schema.fields]
+
+ if identifier not in available_columns:
+ available_ids = [col for col in available_columns if col != config["time_column"]]
+ raise HTTPException(
+ status_code=404,
+ detail=f"ID '{identifier}' not found in {data_source} dataset. Available IDs: {available_ids[:10]}...",
+ )
+
+ return catalog, table, config
+
+
+@api_router.get("/{data_source}/available")
+async def get_available_identifiers(
+ data_source: DataSource = Path(..., description="Data source type"),
+ limit: int = Query(100, description="Maximum number of IDs to return"),
+):
+ """
+ Get list of available identifiers for a data source
+
+ Examples
+ --------
+ GET /data/usgs/available
+ GET /data/usgs/available?limit=50
+ """
+ try:
+ _, table, config = get_catalog_and_table(data_source)
+
+ schema = table.schema()
+ # Get all columns except time column
+ identifier_columns = [field.name for field in schema.fields if field.name != config["time_column"]]
+
+ return {
+ "data_source": data_source.value,
+ "description": config["description"],
+ "total_identifiers": len(identifier_columns),
+ "identifiers": sorted(identifier_columns)[:limit],
+ "showing": min(limit, len(identifier_columns)),
+ "units": config["units"],
+ }
+
+ except HTTPException as e:
+ raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@api_router.get("/{data_source}/csv")
+async def get_data_csv(
+ data_source: DataSource = Path(..., description="Data source type"),
+ identifier: str = Query(
+ ...,
+ description="Station/gauge ID",
+ examples=["01010000"],
+ openapi_examples={"station_example": {"summary": "USGS Gauge", "value": "01010000"}},
+ ),
+ start_date: datetime | None = Query(
+ None,
+ description="Start Date",
+ openapi_examples={"sample_date": {"summary": "Sample Date", "value": "2021-12-31T14:00:00"}},
+ ),
+ end_date: datetime | None = Query(
+ None,
+ description="End Date",
+ openapi_examples={"sample_date": {"summary": "Sample Date", "value": "2022-01-01T14:00:00"}},
+ ),
+ include_headers: bool = Query(True, description="Include CSV headers"),
+):
+ """
+ Get data as CSV file for any data source
+
+ Examples
+ --------
+ GET /data/usgs_hourly/csv?identifier=01031500
+ """
+ try:
+ _, table, config = validate_identifier(data_source, identifier)
+ scan_builder = table.scan(selected_fields=[config["time_column"], identifier])
+ if start_date:
+ scan_builder = scan_builder.filter(f"{config['time_column']} >= '{start_date.isoformat()}'")
+ if end_date:
+ scan_builder = scan_builder.filter(f"{config['time_column']} <= '{end_date.isoformat()}'")
+
+ df = scan_builder.to_pandas()
+
+ if df.empty:
+ return Response(
+ content="Error: No data available for the specified parameters",
+ status_code=404,
+ media_type="text/plain",
+ )
+
+ df = df.rename(columns={config["time_column"]: "time", identifier: "q_cms"})
+
+ csv_buffer = io.StringIO()
+ df.to_csv(csv_buffer, index=False, header=include_headers)
+ csv_data = csv_buffer.getvalue()
+
+ filename_parts = [data_source.value, identifier, "data"]
+ if start_date:
+ filename_parts.append(f"from_{start_date.strftime('%Y%m%d_%H%M')}")
+ if end_date:
+ filename_parts.append(f"to_{end_date.strftime('%Y%m%d_%H%M')}")
+ filename = "_".join(filename_parts) + ".csv"
+
+ return Response(
+ content=csv_data,
+ media_type="text/csv",
+ headers={
+ "Content-Disposition": f"attachment; filename={filename}",
+ "X-Total-Records": str(len(df)),
+ "X-Data-Source": data_source.value,
+ "X-Units": config["units"],
+ },
+ )
+
+ except HTTPException:
+ raise
+
+
+@api_router.get("/{data_source}/parquet")
+async def get_data_parquet(
+ data_source: DataSource = Path(..., description="Data source type"),
+ identifier: str = Query(
+ ...,
+ description="Station/gauge ID",
+ examples=["01010000"],
+ openapi_examples={"station_example": {"summary": "USGS Gauge", "value": "01010000"}},
+ ),
+ start_date: datetime | None = Query(
+ None,
+ description="Start Date",
+ openapi_examples={"sample_date": {"summary": "Sample Date", "value": "2021-12-31T14:00:00"}},
+ ),
+ end_date: datetime | None = Query(
+ None,
+ description="End Date",
+ openapi_examples={"sample_date": {"summary": "Sample Date", "value": "2022-01-01T14:00:00"}},
+ ),
+):
+ """
+ Get data as Parquet file for any data source
+
+ Examples
+ --------
+ GET /data/usgs/parquet?identifier=01031500
+ GET /data/usgs/parquet?identifier=01031500&start_date=2023-01-01T00:00:00&compression=gzip
+ """
+ try:
+ _, table, config = validate_identifier(data_source, identifier)
+
+ scan_builder = table.scan(selected_fields=[config["time_column"], identifier])
+
+ if start_date:
+ scan_builder = scan_builder.filter(f"{config['time_column']} >= '{start_date.isoformat()}'")
+ if end_date:
+ scan_builder = scan_builder.filter(f"{config['time_column']} <= '{end_date.isoformat()}'")
+
+ df = scan_builder.to_pandas()
+ if df.empty:
+ raise HTTPException(status_code=404, detail="No data available for the specified parameters")
+
+ # Prepare output with metadata
+ df = df.rename(columns={config["time_column"]: "time", identifier: "q_cms"}).copy()
+ df["data_source"] = data_source.value
+ df["identifier"] = identifier
+ df["units"] = config["units"]
+
+ parquet_buffer = io.BytesIO()
+ df.to_parquet(parquet_buffer, index=False, compression="lz4", engine="pyarrow")
+ parquet_data = parquet_buffer.getvalue()
+
+ # Fix filename generation with proper datetime formatting
+ filename_parts = [data_source.value, identifier, "data"]
+ if start_date:
+ filename_parts.append(f"from_{start_date.strftime('%Y%m%d_%H%M')}")
+ if end_date:
+ filename_parts.append(f"to_{end_date.strftime('%Y%m%d_%H%M')}")
+ filename = "_".join(filename_parts) + ".parquet"
+
+ return Response(
+ content=parquet_data,
+ media_type="application/octet-stream",
+ headers={
+ "Content-Disposition": f"attachment; filename={filename}",
+ "X-Total-Records": str(len(df)),
+ "X-Data-Source": data_source.value,
+ "X-Compression": "lz4",
+ "X-Units": config["units"],
+ },
+ )
+
+ except HTTPException:
+ raise
+ except ValueError as e:
+ Response(content=f"Error: {str(e)}", status_code=500, media_type="text/plain")
+
+
+@api_router.get("/{data_source}/info")
+async def get_data_source_info(
+ data_source: DataSource = Path(..., description="Data source type"),
+):
+ """
+ Get information about dataset size and recommendations
+
+ Examples
+ --------
+ GET /data/usgs/info
+ """
+ try:
+ _, table, config = get_catalog_and_table(data_source)
+
+ df = table.inspect.snapshots().to_pandas()
+
+ # Converting to an int rather than a numpy.int64
+ latest_snapshot_id = int(df.loc[df["committed_at"].idxmax(), "snapshot_id"])
+ snapshots = table.inspect.snapshots().to_pydict()
+
+ snapshots = dict(snapshots)
+ # Converting to an int rather than a numpy.int64
+ if "snapshot_id" in snapshots and snapshots["snapshot_id"]:
+ snapshots["snapshot_id"] = [int(sid) for sid in snapshots["snapshot_id"]]
+
+ return {
+ "data_source": data_source.value,
+ "latest_snapshot": latest_snapshot_id,
+ "description": config["description"],
+ "units": config["units"],
+ "snapshots": snapshots,
+ }
+ except HTTPException as e:
+ raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@api_router.get("/{data_source}/{identifier}/info")
+async def get_data_info(
+ data_source: DataSource = Path(..., description="Data source type"),
+ identifier: str = Path(..., description="Station/gauge ID", examples=["01031500"]),
+):
+ """
+ Get information about dataset size and recommendations
+
+ Examples
+ --------
+ GET /data/usgs/01031500/info
+ """
+ try:
+ _, table, config = validate_identifier(data_source, identifier)
+
+ # Get data info
+ df = table.scan(selected_fields=[config["time_column"], identifier]).to_pandas()
+ df_clean = df.dropna(subset=[identifier]) # Droping NA to determine full date range
+
+ return {
+ "data_source": data_source.value,
+ "identifier": identifier,
+ "description": config["description"],
+ "total_records": len(df_clean),
+ "units": config["units"],
+ "date_range": {
+ "start": df_clean[config["time_column"]].min().isoformat() if not df_clean.empty else None,
+ "end": df_clean[config["time_column"]].max().isoformat() if not df_clean.empty else None,
+ },
+ "estimated_sizes": {
+ "csv_mb": round(len(df_clean) * 25 / 1024 / 1024, 2),
+ "parquet_mb": round(len(df_clean) * 8 / 1024 / 1024, 2),
+ },
+ }
+
+ except HTTPException:
+ raise
+ except ValueError as e:
+ Response(content=f"Error: {str(e)}", status_code=500, media_type="text/plain")
+
+
+@api_router.get("/sources")
+async def get_available_sources():
+ """
+ Get list of all available data sources
+
+ Examples
+ --------
+ GET /data/sources
+ """
+ sources = []
+ for source, config in DATA_SOURCE_CONFIG.items():
+ sources.append(
+ {
+ "name": source.value,
+ "description": config["description"],
+ "namespace": config["namespace"],
+ "table": config["table"],
+ "units": config["units"],
+ }
+ )
+
+ return {"available_sources": sources, "total_sources": len(sources)}
diff --git a/data/LICENSE b/data/LICENSE
new file mode 100644
index 0000000..b03120f
--- /dev/null
+++ b/data/LICENSE
@@ -0,0 +1,540 @@
+## ODC Open Database License (ODbL)
+
+### Preamble
+
+The Open Database License (ODbL) is a license agreement intended to
+allow users to freely share, modify, and use this Database while
+maintaining this same freedom for others. Many databases are covered by
+copyright, and therefore this document licenses these rights. Some
+jurisdictions, mainly in the European Union, have specific rights that
+cover databases, and so the ODbL addresses these rights, too. Finally,
+the ODbL is also an agreement in contract for users of this Database to
+act in certain ways in return for accessing this Database.
+
+Databases can contain a wide variety of types of content (images,
+audiovisual material, and sounds all in the same database, for example),
+and so the ODbL only governs the rights over the Database, and not the
+contents of the Database individually. Licensors should use the ODbL
+together with another license for the contents, if the contents have a
+single set of rights that uniformly covers all of the contents. If the
+contents have multiple sets of different rights, Licensors should
+describe what rights govern what contents together in the individual
+record or in some other way that clarifies what rights apply.
+
+Sometimes the contents of a database, or the database itself, can be
+covered by other rights not addressed here (such as private contracts,
+trade mark over the name, or privacy rights / data protection rights
+over information in the contents), and so you are advised that you may
+have to consult other documents or clear other rights before doing
+activities not covered by this License.
+
+------
+
+The Licensor (as defined below)
+
+and
+
+You (as defined below)
+
+agree as follows:
+
+### 1.0 Definitions of Capitalised Words
+
+"Collective Database" – Means this Database in unmodified form as part
+of a collection of independent databases in themselves that together are
+assembled into a collective whole. A work that constitutes a Collective
+Database will not be considered a Derivative Database.
+
+"Convey" – As a verb, means Using the Database, a Derivative Database,
+or the Database as part of a Collective Database in any way that enables
+a Person to make or receive copies of the Database or a Derivative
+Database. Conveying does not include interaction with a user through a
+computer network, or creating and Using a Produced Work, where no
+transfer of a copy of the Database or a Derivative Database occurs.
+"Contents" – The contents of this Database, which includes the
+information, independent works, or other material collected into the
+Database. For example, the contents of the Database could be factual
+data or works such as images, audiovisual material, text, or sounds.
+
+"Database" – A collection of material (the Contents) arranged in a
+systematic or methodical way and individually accessible by electronic
+or other means offered under the terms of this License.
+
+"Database Directive" – Means Directive 96/9/EC of the European
+Parliament and of the Council of 11 March 1996 on the legal protection
+of databases, as amended or succeeded.
+
+"Database Right" – Means rights resulting from the Chapter III ("sui
+generis") rights in the Database Directive (as amended and as transposed
+by member states), which includes the Extraction and Re-utilisation of
+the whole or a Substantial part of the Contents, as well as any similar
+rights available in the relevant jurisdiction under Section 10.4.
+
+"Derivative Database" – Means a database based upon the Database, and
+includes any translation, adaptation, arrangement, modification, or any
+other alteration of the Database or of a Substantial part of the
+Contents. This includes, but is not limited to, Extracting or
+Re-utilising the whole or a Substantial part of the Contents in a new
+Database.
+
+"Extraction" – Means the permanent or temporary transfer of all or a
+Substantial part of the Contents to another medium by any means or in
+any form.
+
+"License" – Means this license agreement and is both a license of rights
+such as copyright and Database Rights and an agreement in contract.
+
+"Licensor" – Means the Person that offers the Database under the terms
+of this License.
+
+"Person" – Means a natural or legal person or a body of persons
+corporate or incorporate.
+
+"Produced Work" – a work (such as an image, audiovisual material, text,
+or sounds) resulting from using the whole or a Substantial part of the
+Contents (via a search or other query) from this Database, a Derivative
+Database, or this Database as part of a Collective Database.
+
+"Publicly" – means to Persons other than You or under Your control by
+either more than 50% ownership or by the power to direct their
+activities (such as contracting with an independent consultant).
+
+"Re-utilisation" – means any form of making available to the public all
+or a Substantial part of the Contents by the distribution of copies, by
+renting, by online or other forms of transmission.
+
+"Substantial" – Means substantial in terms of quantity or quality or a
+combination of both. The repeated and systematic Extraction or
+Re-utilisation of insubstantial parts of the Contents may amount to the
+Extraction or Re-utilisation of a Substantial part of the Contents.
+
+"Use" – As a verb, means doing any act that is restricted by copyright
+or Database Rights whether in the original medium or any other; and
+includes without limitation distributing, copying, publicly performing,
+publicly displaying, and preparing derivative works of the Database, as
+well as modifying the Database as may be technically necessary to use it
+in a different mode or format.
+
+"You" – Means a Person exercising rights under this License who has not
+previously violated the terms of this License with respect to the
+Database, or who has received express permission from the Licensor to
+exercise rights under this License despite a previous violation.
+
+Words in the singular include the plural and vice versa.
+
+### 2.0 What this License covers
+
+2.1. Legal effect of this document. This License is:
+
+ a. A license of applicable copyright and neighbouring rights;
+
+ b. A license of the Database Right; and
+
+ c. An agreement in contract between You and the Licensor.
+
+2.2 Legal rights covered. This License covers the legal rights in the
+Database, including:
+
+ a. Copyright. Any copyright or neighbouring rights in the Database.
+ The copyright licensed includes any individual elements of the
+ Database, but does not cover the copyright over the Contents
+ independent of this Database. See Section 2.4 for details. Copyright
+ law varies between jurisdictions, but is likely to cover: the Database
+ model or schema, which is the structure, arrangement, and organisation
+ of the Database, and can also include the Database tables and table
+ indexes; the data entry and output sheets; and the Field names of
+ Contents stored in the Database;
+
+ b. Database Rights. Database Rights only extend to the Extraction and
+ Re-utilisation of the whole or a Substantial part of the Contents.
+ Database Rights can apply even when there is no copyright over the
+ Database. Database Rights can also apply when the Contents are removed
+ from the Database and are selected and arranged in a way that would
+ not infringe any applicable copyright; and
+
+ c. Contract. This is an agreement between You and the Licensor for
+ access to the Database. In return you agree to certain conditions of
+ use on this access as outlined in this License.
+
+2.3 Rights not covered.
+
+ a. This License does not apply to computer programs used in the making
+ or operation of the Database;
+
+ b. This License does not cover any patents over the Contents or the
+ Database; and
+
+ c. This License does not cover any trademarks associated with the
+ Database.
+
+2.4 Relationship to Contents in the Database. The individual items of
+the Contents contained in this Database may be covered by other rights,
+including copyright, patent, data protection, privacy, or personality
+rights, and this License does not cover any rights (other than Database
+Rights or in contract) in individual Contents contained in the Database.
+For example, if used on a Database of images (the Contents), this
+License would not apply to copyright over individual images, which could
+have their own separate licenses, or one single license covering all of
+the rights over the images.
+
+### 3.0 Rights granted
+
+3.1 Subject to the terms and conditions of this License, the Licensor
+grants to You a worldwide, royalty-free, non-exclusive, terminable (but
+only under Section 9) license to Use the Database for the duration of
+any applicable copyright and Database Rights. These rights explicitly
+include commercial use, and do not exclude any field of endeavour. To
+the extent possible in the relevant jurisdiction, these rights may be
+exercised in all media and formats whether now known or created in the
+future.
+
+The rights granted cover, for example:
+
+ a. Extraction and Re-utilisation of the whole or a Substantial part of
+ the Contents;
+
+ b. Creation of Derivative Databases;
+
+ c. Creation of Collective Databases;
+
+ d. Creation of temporary or permanent reproductions by any means and
+ in any form, in whole or in part, including of any Derivative
+ Databases or as a part of Collective Databases; and
+
+ e. Distribution, communication, display, lending, making available, or
+ performance to the public by any means and in any form, in whole or in
+ part, including of any Derivative Database or as a part of Collective
+ Databases.
+
+3.2 Compulsory license schemes. For the avoidance of doubt:
+
+ a. Non-waivable compulsory license schemes. In those jurisdictions in
+ which the right to collect royalties through any statutory or
+ compulsory licensing scheme cannot be waived, the Licensor reserves
+ the exclusive right to collect such royalties for any exercise by You
+ of the rights granted under this License;
+
+ b. Waivable compulsory license schemes. In those jurisdictions in
+ which the right to collect royalties through any statutory or
+ compulsory licensing scheme can be waived, the Licensor waives the
+ exclusive right to collect such royalties for any exercise by You of
+ the rights granted under this License; and,
+
+ c. Voluntary license schemes. The Licensor waives the right to collect
+ royalties, whether individually or, in the event that the Licensor is
+ a member of a collecting society that administers voluntary licensing
+ schemes, via that society, from any exercise by You of the rights
+ granted under this License.
+
+3.3 The right to release the Database under different terms, or to stop
+distributing or making available the Database, is reserved. Note that
+this Database may be multiple-licensed, and so You may have the choice
+of using alternative licenses for this Database. Subject to Section
+10.4, all other rights not expressly granted by Licensor are reserved.
+
+### 4.0 Conditions of Use
+
+4.1 The rights granted in Section 3 above are expressly made subject to
+Your complying with the following conditions of use. These are important
+conditions of this License, and if You fail to follow them, You will be
+in material breach of its terms.
+
+4.2 Notices. If You Publicly Convey this Database, any Derivative
+Database, or the Database as part of a Collective Database, then You
+must:
+
+ a. Do so only under the terms of this License or another license
+ permitted under Section 4.4;
+
+ b. Include a copy of this License (or, as applicable, a license
+ permitted under Section 4.4) or its Uniform Resource Identifier (URI)
+ with the Database or Derivative Database, including both in the
+ Database or Derivative Database and in any relevant documentation; and
+
+ c. Keep intact any copyright or Database Right notices and notices
+ that refer to this License.
+
+ d. If it is not possible to put the required notices in a particular
+ file due to its structure, then You must include the notices in a
+ location (such as a relevant directory) where users would be likely to
+ look for it.
+
+4.3 Notice for using output (Contents). Creating and Using a Produced
+Work does not require the notice in Section 4.2. However, if you
+Publicly Use a Produced Work, You must include a notice associated with
+the Produced Work reasonably calculated to make any Person that uses,
+views, accesses, interacts with, or is otherwise exposed to the Produced
+Work aware that Content was obtained from the Database, Derivative
+Database, or the Database as part of a Collective Database, and that it
+is available under this License.
+
+ a. Example notice. The following text will satisfy notice under
+ Section 4.3:
+
+ Contains information from DATABASE NAME, which is made available
+ here under the Open Database License (ODbL).
+
+DATABASE NAME should be replaced with the name of the Database and a
+hyperlink to the URI of the Database. "Open Database License" should
+contain a hyperlink to the URI of the text of this License. If
+hyperlinks are not possible, You should include the plain text of the
+required URI's with the above notice.
+
+4.4 Share alike.
+
+ a. Any Derivative Database that You Publicly Use must be only under
+ the terms of:
+
+ i. This License;
+
+ ii. A later version of this License similar in spirit to this
+ License; or
+
+ iii. A compatible license.
+
+ If You license the Derivative Database under one of the licenses
+ mentioned in (iii), You must comply with the terms of that license.
+
+ b. For the avoidance of doubt, Extraction or Re-utilisation of the
+ whole or a Substantial part of the Contents into a new database is a
+ Derivative Database and must comply with Section 4.4.
+
+ c. Derivative Databases and Produced Works. A Derivative Database is
+ Publicly Used and so must comply with Section 4.4. if a Produced Work
+ created from the Derivative Database is Publicly Used.
+
+ d. Share Alike and additional Contents. For the avoidance of doubt,
+ You must not add Contents to Derivative Databases under Section 4.4 a
+ that are incompatible with the rights granted under this License.
+
+ e. Compatible licenses. Licensors may authorise a proxy to determine
+ compatible licenses under Section 4.4 a iii. If they do so, the
+ authorised proxy's public statement of acceptance of a compatible
+ license grants You permission to use the compatible license.
+
+
+4.5 Limits of Share Alike. The requirements of Section 4.4 do not apply
+in the following:
+
+ a. For the avoidance of doubt, You are not required to license
+ Collective Databases under this License if You incorporate this
+ Database or a Derivative Database in the collection, but this License
+ still applies to this Database or a Derivative Database as a part of
+ the Collective Database;
+
+ b. Using this Database, a Derivative Database, or this Database as
+ part of a Collective Database to create a Produced Work does not
+ create a Derivative Database for purposes of Section 4.4; and
+
+ c. Use of a Derivative Database internally within an organisation is
+ not to the public and therefore does not fall under the requirements
+ of Section 4.4.
+
+4.6 Access to Derivative Databases. If You Publicly Use a Derivative
+Database or a Produced Work from a Derivative Database, You must also
+offer to recipients of the Derivative Database or Produced Work a copy
+in a machine readable form of:
+
+ a. The entire Derivative Database; or
+
+ b. A file containing all of the alterations made to the Database or
+ the method of making the alterations to the Database (such as an
+ algorithm), including any additional Contents, that make up all the
+ differences between the Database and the Derivative Database.
+
+The Derivative Database (under a.) or alteration file (under b.) must be
+available at no more than a reasonable production cost for physical
+distributions and free of charge if distributed over the internet.
+
+4.7 Technological measures and additional terms
+
+ a. This License does not allow You to impose (except subject to
+ Section 4.7 b.) any terms or any technological measures on the
+ Database, a Derivative Database, or the whole or a Substantial part of
+ the Contents that alter or restrict the terms of this License, or any
+ rights granted under it, or have the effect or intent of restricting
+ the ability of any person to exercise those rights.
+
+ b. Parallel distribution. You may impose terms or technological
+ measures on the Database, a Derivative Database, or the whole or a
+ Substantial part of the Contents (a "Restricted Database") in
+ contravention of Section 4.74 a. only if You also make a copy of the
+ Database or a Derivative Database available to the recipient of the
+ Restricted Database:
+
+ i. That is available without additional fee;
+
+ ii. That is available in a medium that does not alter or restrict
+ the terms of this License, or any rights granted under it, or have
+ the effect or intent of restricting the ability of any person to
+ exercise those rights (an "Unrestricted Database"); and
+
+ iii. The Unrestricted Database is at least as accessible to the
+ recipient as a practical matter as the Restricted Database.
+
+ c. For the avoidance of doubt, You may place this Database or a
+ Derivative Database in an authenticated environment, behind a
+ password, or within a similar access control scheme provided that You
+ do not alter or restrict the terms of this License or any rights
+ granted under it or have the effect or intent of restricting the
+ ability of any person to exercise those rights.
+
+4.8 Licensing of others. You may not sublicense the Database. Each time
+You communicate the Database, the whole or Substantial part of the
+Contents, or any Derivative Database to anyone else in any way, the
+Licensor offers to the recipient a license to the Database on the same
+terms and conditions as this License. You are not responsible for
+enforcing compliance by third parties with this License, but You may
+enforce any rights that You have over a Derivative Database. You are
+solely responsible for any modifications of a Derivative Database made
+by You or another Person at Your direction. You may not impose any
+further restrictions on the exercise of the rights granted or affirmed
+under this License.
+
+### 5.0 Moral rights
+
+5.1 Moral rights. This section covers moral rights, including any rights
+to be identified as the author of the Database or to object to treatment
+that would otherwise prejudice the author's honour and reputation, or
+any other derogatory treatment:
+
+ a. For jurisdictions allowing waiver of moral rights, Licensor waives
+ all moral rights that Licensor may have in the Database to the fullest
+ extent possible by the law of the relevant jurisdiction under Section
+ 10.4;
+
+ b. If waiver of moral rights under Section 5.1 a in the relevant
+ jurisdiction is not possible, Licensor agrees not to assert any moral
+ rights over the Database and waives all claims in moral rights to the
+ fullest extent possible by the law of the relevant jurisdiction under
+ Section 10.4; and
+
+ c. For jurisdictions not allowing waiver or an agreement not to assert
+ moral rights under Section 5.1 a and b, the author may retain their
+ moral rights over certain aspects of the Database.
+
+Please note that some jurisdictions do not allow for the waiver of moral
+rights, and so moral rights may still subsist over the Database in some
+jurisdictions.
+
+### 6.0 Fair dealing, Database exceptions, and other rights not affected
+
+6.1 This License does not affect any rights that You or anyone else may
+independently have under any applicable law to make any use of this
+Database, including without limitation:
+
+ a. Exceptions to the Database Right including: Extraction of Contents
+ from non-electronic Databases for private purposes, Extraction for
+ purposes of illustration for teaching or scientific research, and
+ Extraction or Re-utilisation for public security or an administrative
+ or judicial procedure.
+
+ b. Fair dealing, fair use, or any other legally recognised limitation
+ or exception to infringement of copyright or other applicable laws.
+
+6.2 This License does not affect any rights of lawful users to Extract
+and Re-utilise insubstantial parts of the Contents, evaluated
+quantitatively or qualitatively, for any purposes whatsoever, including
+creating a Derivative Database (subject to other rights over the
+Contents, see Section 2.4). The repeated and systematic Extraction or
+Re-utilisation of insubstantial parts of the Contents may however amount
+to the Extraction or Re-utilisation of a Substantial part of the
+Contents.
+
+### 7.0 Warranties and Disclaimer
+
+7.1 The Database is licensed by the Licensor "as is" and without any
+warranty of any kind, either express, implied, or arising by statute,
+custom, course of dealing, or trade usage. Licensor specifically
+disclaims any and all implied warranties or conditions of title,
+non-infringement, accuracy or completeness, the presence or absence of
+errors, fitness for a particular purpose, merchantability, or otherwise.
+Some jurisdictions do not allow the exclusion of implied warranties, so
+this exclusion may not apply to You.
+
+### 8.0 Limitation of liability
+
+8.1 Subject to any liability that may not be excluded or limited by law,
+the Licensor is not liable for, and expressly excludes, all liability
+for loss or damage however and whenever caused to anyone by any use
+under this License, whether by You or by anyone else, and whether caused
+by any fault on the part of the Licensor or not. This exclusion of
+liability includes, but is not limited to, any special, incidental,
+consequential, punitive, or exemplary damages such as loss of revenue,
+data, anticipated profits, and lost business. This exclusion applies
+even if the Licensor has been advised of the possibility of such
+damages.
+
+8.2 If liability may not be excluded by law, it is limited to actual and
+direct financial loss to the extent it is caused by proved negligence on
+the part of the Licensor.
+
+### 9.0 Termination of Your rights under this License
+
+9.1 Any breach by You of the terms and conditions of this License
+automatically terminates this License with immediate effect and without
+notice to You. For the avoidance of doubt, Persons who have received the
+Database, the whole or a Substantial part of the Contents, Derivative
+Databases, or the Database as part of a Collective Database from You
+under this License will not have their licenses terminated provided
+their use is in full compliance with this License or a license granted
+under Section 4.8 of this License. Sections 1, 2, 7, 8, 9 and 10 will
+survive any termination of this License.
+
+9.2 If You are not in breach of the terms of this License, the Licensor
+will not terminate Your rights under it.
+
+9.3 Unless terminated under Section 9.1, this License is granted to You
+for the duration of applicable rights in the Database.
+
+9.4 Reinstatement of rights. If you cease any breach of the terms and
+conditions of this License, then your full rights under this License
+will be reinstated:
+
+ a. Provisionally and subject to permanent termination until the 60th
+ day after cessation of breach;
+
+ b. Permanently on the 60th day after cessation of breach unless
+ otherwise reasonably notified by the Licensor; or
+
+ c. Permanently if reasonably notified by the Licensor of the
+ violation, this is the first time You have received notice of
+ violation of this License from the Licensor, and You cure the
+ violation prior to 30 days after your receipt of the notice.
+
+Persons subject to permanent termination of rights are not eligible to
+be a recipient and receive a license under Section 4.8.
+
+9.5 Notwithstanding the above, Licensor reserves the right to release
+the Database under different license terms or to stop distributing or
+making available the Database. Releasing the Database under different
+license terms or stopping the distribution of the Database will not
+withdraw this License (or any other license that has been, or is
+required to be, granted under the terms of this License), and this
+License will continue in full force and effect unless terminated as
+stated above.
+
+### 10.0 General
+
+10.1 If any provision of this License is held to be invalid or
+unenforceable, that must not affect the validity or enforceability of
+the remainder of the terms and conditions of this License and each
+remaining provision of this License shall be valid and enforced to the
+fullest extent permitted by law.
+
+10.2 This License is the entire agreement between the parties with
+respect to the rights granted here over the Database. It replaces any
+earlier understandings, agreements or representations with respect to
+the Database.
+
+10.3 If You are in breach of the terms of this License, You will not be
+entitled to rely on the terms of this License or to complain of any
+breach by the Licensor.
+
+10.4 Choice of law. This License takes effect in and will be governed by
+the laws of the relevant jurisdiction in which the License terms are
+sought to be enforced. If the standard suite of rights granted under
+applicable copyright law and Database Rights in the relevant
+jurisdiction includes additional rights not granted under this License,
+these additional rights are granted in this License in order to meet the
+terms of this License.
diff --git a/data/module_ipes/cfe_params.csv b/data/module_ipes/cfe_params.csv
new file mode 100644
index 0000000..b829027
--- /dev/null
+++ b/data/module_ipes/cfe_params.csv
@@ -0,0 +1,25 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+soil_params.b,beta exponent on Clapp-Hornberger (1978) soil water relations,NULL,double,True,attr,2,15,4.05,mode.bexp_soil_layers_stag=1,
+soil_params.satdk,saturated hydraulic conductivity,m/s,double,True,attr,0.000000195,0.00141,0.00000338,geom_mean.dksat_soil_layers_stag=1,
+soil_params.satpsi,saturated capillary head,m,double,True,attr,0.036,0.955,0.355,geom_mean.psisat_soil_layers_stag=1,
+soil_params.slop,this factor (0-1) modifies the gradient of the hydraulic head at the soil bottom. 0=no-flow.,m/m,double,True,attr,0.0000598,1,0.05,mean.slope_1km,
+soil_params.smcmax,saturated soil moisture content (Maximum soil moisture content),m/m,double,True,attr,0.16,0.58,0.439,mean.smcmax_soil_layers_stag=1,
+soil_params.wltsmc,wilting point soil moisture content (< soil_params.smcmax),m/m,double,True,attr,0.05,0.3,0.439,mean.smcwlt_soil_layers_stag=1,
+soil_params.expon,"optional; defaults to 1, This parameter defines the soil reservoirs to be linear, Use linear reservoirs",NULL,double,False,const,NULL,NULL,1,,
+soil_params.expon_secondary,"optional; defaults to 1, This parameter defines the soil reservoirs to be linear, Use linear reservoirs ",NULL,double,False,const,NULL,NULL,1,,
+max_gw_storage,maximum storage in the conceptual reservoir,m,double,True,attr,0.01,0.25,0.05,mean.Zmax,
+Cgw,the primary outlet coefficient,m/hr,double,True,attr,0.0000018,0.0018,0.000018,mean.Coeff,
+expon,exponent parameter for nonlinear ground water reservoir (1.0 for linear reservoir),NULL,double,True,attr,1,8,3,mode.Expon,
+gw_storage,initial condition for groundwater reservoir - it is the ground water as a decimal fraction of the maximum groundwater storage (max_gw_storage) for the initial timestep,m/m,double,False,const,NULL,NULL,0.05,,
+alpha_fc,alpha at fc for clapp hornberger (field capacity),NULL,double,False,const,NULL,NULL,0.33,,
+soil_storage,initial condition for soil reservoir - it is the water in the soil as a decimal fraction of maximum soil water storage (smcmax x depth) for the initial timestep. Default = 0.5,m/m,double,False,const,NULL,NULL,0.5,,
+K_nash,Nash Config param for lateral subsurface runoff (Nash discharge to storage ratio),1/m,double,True,const,0,1,0.003,,
+K_lf,Nash Config param - primary reservoir,NULL,double,True,const,0,1,0.01,,
+nash_storage,Nash Config param - secondary reservoir ,NULL,double,False,const,NULL,NULL,"0.0,0.0",,
+giuh_ordinates,Giuh (geomorphological instantaneous unit hydrograph) ordinates in dt time steps,NULL,double,False,const,NULL,NULL,"0.55, 0.25, 0.2",,
+a_Xinanjiang_inflection_point_parameter,when surface_water_partitioning_scheme=Xinanjiang ,NULL,double,True,iceberg,-0.5,0.5,-0.2,AXAJ,CFE-X_params
+b_Xinanjiang_shape_parameter,when surface_water_partitioning_scheme=Xinanjiang ,NULL,double,True,iceberg,0.01,10,0.66,BXAJ,CFE-X_params
+x_Xinanjiang_shape_parameter,when surface_water_partitioning_scheme=Xinanjiang ,NULL,double,True,iceberg,0.01,10,0.02,XXAJ,CFE-X_params
+urban_decimal_fraction,when surface_water_partitioning_scheme=Xinanjiang,NULL,double ,False,const,0,1,0.01,,
+refkdt,Reference Soil Infiltration Parameter (used in runoff formulation),NULL,double,True,attr,0.1,4,1,mean.refkdt,
+soil_params.depth,soil depth,m,double,False,const,NULL,NULL,2,,
\ No newline at end of file
diff --git a/data/module_ipes/lasam_out.csv b/data/module_ipes/lasam_out.csv
new file mode 100644
index 0000000..285b009
--- /dev/null
+++ b/data/module_ipes/lasam_out.csv
@@ -0,0 +1,16 @@
+variable,description
+actual_evapotranspiration,volume of AET
+giuh_runoff,volume of giuh runoff
+groundwater_to_stream_recharge,outgoing water from ground reservoir to stream channel
+infiltration,volume of infiltrated water
+mass_balance,mass balance error
+percolation,volume of water leaving soil through the bottom of the domain (ground water recharge)
+potential_evapotranspiration,volume of PET
+precipitation,total precipitation
+soil_depth_layers,Soil depth layers
+soil_depth_wetting_fronts,Soil depth wetting fronts
+soil_moisture_wetting_fronts,Soil moisture wetting front
+soil_num_wetting_fronts,Number of soil wetting fronts
+soil_storage,volume of water left
+surface_runoff,volume of water surface runoff
+total_discharge,total outgoing water
\ No newline at end of file
diff --git a/data/module_ipes/lasam_params.csv b/data/module_ipes/lasam_params.csv
new file mode 100644
index 0000000..c7eb030
--- /dev/null
+++ b/data/module_ipes/lasam_params.csv
@@ -0,0 +1,26 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+forcing_file,provides precip. and PET inputs,NULL,string,FALSE,NULL,NULL,NULL,NULL,,
+soil_params_file,provides soil types with van Genuchton parameters,NULL,string,FALSE,NULL,NULL,NULL,NULL,,
+theta_r,residual water content - the minimum volumetric water content that a soil layer can naturally attain,NULL,double,TRUE,NULL,0.01,0.15,0.095,,
+theta_e,the maximum volumetric water content that a soil layer can naturally attain,NULL,double,TRUE,NULL,0.3,0.8,0.41,,
+alpha,the van Genuchton parameter related to the inverse of air entry pressure,1/cm,double,TRUE,NULL,0.001,0.3,0.019,,
+n,the van Genuchton parameter related to pore size distribution,NULL,double,TRUE,NULL,1.01,3,1.31,,
+Ks,the saturated hydraulic conductivity of a soil,cm/h,double,TRUE,NULL,0.001,100,0.26,,
+layer_thickness,individual layer thickness (not absolute),cm,double (1D array),FALSE,NULL,NULL,NULL,200,,
+initial_psi,used to initialize layers with a constant head,cm,double,FALSE,NULL,0,NULL,2000,,
+ponded_depth_max,max amount of water unavailable for surface drainage,cm,double,TRUE,NULL,0,5,1.1,,
+timestep,timestep of the model,sec/min/hr,double,FALSE,NULL,0,NULL,300,,
+forcing_resolution,timestep of the forcing data,sec/min/hr,double,FALSE,NULL,NULL,NULL,3600,,
+endtime,time at which model simulation ends,"sec, min, hr, d",double,FALSE,NULL,0,NULL,NULL,,
+layer_soil_type,layer soil type (read from soil_params_file),NULL,int (1D array),FALSE,NULL,NULL,NULL,9,,
+max_valid_soil_types,max number of valid soil types read from soil_params_file,NULL,int,FALSE,NULL,1,NULL,15,,
+wilting_point_psi,the amount of water not available for plants - used in computing AET,cm,double,FALSE,NULL,NULL,NULL,15495,,
+field_capacity_psi,capillary head corresponding to volumetric water content at which gravity drainage becomes slower - used in computing AET,cm,double,TRUE,NULL,10.3,516.6,340.9,,
+use_closed_form_G,determines whether the numeric integral or closed form for G is used; a value of true will use the closed form,NULL,boolean,FALSE,NULL,NULL,NULL,FALSE,,
+giuh_ordinates,GIUH ordinates (for giuh based surface runoff),NULL,double (1D array),FALSE,NULL,NULL,NULL,"0.06,0.51,0.28,0.12,0.03",,
+verbosity,controls IO (screen outputs and writing to disk),NULL,string,FALSE,NULL,NULL,NULL,NULL,,
+sft_coupled,couples LASAM to SFT,NULL,boolean,FALSE,NULL,NULL,NULL,NULL,,
+soil_z,vertical resolution of the soil column,cm,double (1D array),FALSE,NULL,NULL,NULL,NULL,,
+calib_params,"when set to true, calibratable params are calibrated",NULL,boolean,FALSE,NULL,NULL,NULL,FALSE,,
+adaptive_timestep,"when set to true, will use an internal adaptive timestep, and the above timestep is used as a minimum timestep",NULL,boolean,FALSE,NULL,NULL,NULL,NULL,,
+
diff --git a/data/module_ipes/modules.csv b/data/module_ipes/modules.csv
new file mode 100644
index 0000000..b12d663
--- /dev/null
+++ b/data/module_ipes/modules.csv
@@ -0,0 +1,10 @@
+module,file,outputs
+CFE-X,cfe_params.csv,
+Noah-OWP-Modular,noah_owp_modular_params.csv,
+Snow-17,snow17_params.csv,
+Sac-SMA,sac_sma_params.csv,
+TopModel,topmodel_params.csv,
+UEB,ueb_params.csv,
+SFT,sft_params.csv,
+SMP,smp_params.csv,
+LASAM,lasam_params.csv,lasam_out.csv
diff --git a/data/module_ipes/noah_owp_modular_params.csv b/data/module_ipes/noah_owp_modular_params.csv
new file mode 100644
index 0000000..dea7f31
--- /dev/null
+++ b/data/module_ipes/noah_owp_modular_params.csv
@@ -0,0 +1,14 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+RSURF_EXP,Exponent in the resistance equation for soil evaporation,NA,double,True,const,1,6,4.84,,
+CWPVT,Canopy wind parameter for canopy wind profile formulation,m-1,double,True,const,0.09,0.36,0.18,,
+VCMX25,Maximum carboxylation at 25oC,umol/m2/s,double,True,const,24,112,52.2,,
+MP,Slope of Ball-Berry conductance relationship,NA,double,True,const,3.6,12.6,9.7,,
+MFSNO,Melt factor for snow depletion curve,NA,double,True,const,0.5,4,2,,
+RSURF_SNOW,Soil surface resistance for snow,s/m,double,True,const,0.136,100,49.2,,
+SCAMAX,Maximum fractional snow cover area,NA,double,True,const,0.7,1,0.89,,
+ISLTYP,Soil type,NA,integer,False,attr,,,,mode.ISLTYP,
+IVGTYP,Vegetation type,NA,integer,False,attr,,,,mode.IVGTYP,
+longitude,longitude,degrees,double,False,attr,-180,180,,centroid_x,
+latitude,latitude,degrees,double,False,attr,-90,90,,centroid_y,
+slope,slope of terrain,degrees,double,False,attr,0,90,,mean.slope,
+azimuth,azimuth,degrees,double,False,attr,0,360,,circ_mean.aspect,
diff --git a/data/module_ipes/sac_sma_params.csv b/data/module_ipes/sac_sma_params.csv
new file mode 100644
index 0000000..c8ce61e
--- /dev/null
+++ b/data/module_ipes/sac_sma_params.csv
@@ -0,0 +1,20 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+hru_id,Identification string for each hrus,NULL,char,False,const,25,125,NA,divide_id,
+hru_area,Area of each HRU,NULL,double,False,attr,10,100,NA,areasqkm,
+uztwm,upper zone tension water maximum storage,mm,double,True,iceberg,25,125,75,,sac_sma_params
+uzfwm,Maximum upper zone free water,mm,double,True,iceberg,10,100,30,,sac_sma_params
+lztwm,Maximum lower zone tension water,mm,double,True,iceberg,75,300,150,,sac_sma_params
+lzfsm,"Maximum lower zone free water, secondary (aka supplemental)",mm,double,True,iceberg,15,300,150,,sac_sma_params
+lzfpm,"Maximum lower zone free water, primary",mm,double,True,iceberg,40,600,300,,sac_sma_params
+adimp,Additional impervious area due to saturation,decimal percent,double,True,const,0,0.2,0,,
+uzk,Upper zone recession coefficient,per day ,double,True,iceberg,0.2,0.5,0.3,,sac_sma_params
+lzpk,"Lower zone recession coefficient, primary",decimal percent,double,True,iceberg,0.001,0.015,0.01,,sac_sma_params
+lzsk,"Lower zone recession coefficient, secondary (aka supplemental)",decimal percent,double,True,iceberg,0.03,0.2,0.1,,sac_sma_params
+zperc,Minimum percolation rate coefficient,NULL,double,True,iceberg,20,300,10,,sac_sma_params
+rexp,Percolation equation exponent,NULL,double,True,iceberg,1.4,3.5,2,,sac_sma_params
+pctim,impervious fraction of the watershed area ,decimal percent,double,True,const,0,0.05,0,,
+pfree,fraction of water percolating from upper zone directly to lower zone free water storage. ,decimal percent,double,True,iceberg,0,0.5,0.1,,sac_sma_params
+riva,Percent of the basin that is riparian area,decimal percent,double,True,const,0,0.2,0,,
+side,Portion of the baseflow which does not go to the stream,decimal percent,double,False,const,0,0.2,0,,
+rserv,Percent of lower zone free water not transferable to the lower zone tension water,decimal percent,double,False,const,0.2,0.4,0.3,,
+
diff --git a/data/module_ipes/sft_params.csv b/data/module_ipes/sft_params.csv
new file mode 100644
index 0000000..1af8bd7
--- /dev/null
+++ b/data/module_ipes/sft_params.csv
@@ -0,0 +1,14 @@
+name,description,units,data_type,calibratable,source_file,min,max,nwm_name,default_value
+end_time,Simulation duration. If no unit is specified defaults to hour.,"s, sec, h, hr, d, day",double,FALSE,const,NULL,NULL,NULL,NULL
+soil_params.quartz,"soil quartz content, used in soil thermal conductivity function of Peters-Lidard",m,double,FALSE,const,NULL,NULL,NULL,NULL
+ice_fraction_scheme,"runoff scheme used in the soil reservoir models (e.g. CFE), options: Schaake and Xinanjiang",NULL,int,FALSE,const,NULL,NULL,NULL,NULL
+soil_z,vertical resolution of the soil column (computational domain of the SFT model),m,double,FALSE,const,NULL,NULL,NULL,NULL
+soil_temperature,initial soil temperature for the discretized column,K,double,FALSE,const,NULL,NULL,NULL,NULL
+soil_moisture_content,initial soil total (liquid + ice) moisture content for the discretized column,NULL,double,FALSE,const,NULL,NULL,NULL,NULL
+soil_liquid_content,initial soil liquid moisture content for the discretized column,NULL,double,FALSE,const,NULL,NULL,NULL,NULL
+bottom_boundary_temp,"temperature at the bottom boundary (BC) of the domain, if not specified, the default BC is zero-geothermal flux",K,double,FALSE,const,NULL,NULL,NULL,NULL
+top_boundary_temp,"temperature at the top/surface boundary of the domain, if not specified, then other options include: 1) read from a file, or 2) provided through coupling",K,double,FALSE,const,NULL,NULL,NULL,NULL
+sft_standalone,true for standalone model run; default is false,NULL,boolean,FALSE,const,NULL,NULL,NULL,NULL
+soil_moisture_bmi,If true soil_moisture_profile is set by the SoilMoistureProfile module through the BMI; if false then config file must provide soil_moisture_content and soil_liquid_content,NULL,boolean,FALSE,const,NULL,NULL,NULL,NULL
+dt,Size of a simulation timestep. If no unit is specified defaults to hour.,"s, sec, h, hr, d, day",double,FALSE,const,NULL,NULL,NULL,NULL
+verbosity,"high, low, or none",NULL,NULL,FALSE,const,NULL,NULL,NULL,NULL
diff --git a/data/module_ipes/smp_params.csv b/data/module_ipes/smp_params.csv
new file mode 100644
index 0000000..b8b13ee
--- /dev/null
+++ b/data/module_ipes/smp_params.csv
@@ -0,0 +1,8 @@
+name,description,units,data_type,calibratable,source,min,max,nwm_name,default_value,divide_attr_name,source_file
+soil_z,vertical resolution of the soil moisture profile (depths from the surface),m,double,FALSE,NULL,NULL,NULL,NULL,NULL,,
+soil_storage_depth,"depth of the soil reservoir model (e.g., CFE). Note: this depth can be different from the depth of the soil moisture profile which is based on soil_z",m,double,FALSE,NULL,NULL,NULL,NULL,NULL,,
+soil_storage_model,"if conceptual, conceptual models are used for computing the soil moisture profile (e.g., CFE). If layered, layered-based soil moisture models are used (e.g., LGAR). If topmodel, topmodel's variables are used",NULL,string,FALSE,NULL,NULL,NULL,NULL,NULL,,
+soil_moisture_profile_option,constant for layered-constant profile. linear for linearly interpolated values between two consecutive layers. Needed if soil_storage_model = layered.,NULL,string,FALSE,NULL,NULL,NULL,NULL,NULL,,
+soil_depth_layers,Absolute depth of soil layers. Needed if soil_storage_model = layered.,NULL,double,FALSE,NULL,NULL,NULL,NULL,NULL,,
+soil_moisture_fraction_depth,**user specified depth for the soil moisture fraction (default is 40 cm),m,double,FALSE,NULL,NULL,NULL,NULL,NULL,,
+water_table_based_method,"Needed if soil_storage_model = topmodel. flux-based uses an iterative scheme, and deficit-based uses catchment deficit to compute soil moisture profile",NULL,string,FALSE,NULL,NULL,NULL,NULL,NULL,,
diff --git a/data/module_ipes/snow17_params.csv b/data/module_ipes/snow17_params.csv
new file mode 100644
index 0000000..671cbe1
--- /dev/null
+++ b/data/module_ipes/snow17_params.csv
@@ -0,0 +1,27 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+mfmax, Maximum melt factor during non-rain periods – assumed to occur on June 21,mm/˚C/hr,double,True,iceberg,0.1,2.2,1,,sac_sma_params
+hru_area, needed for combination and routing conv, sq-km,double,False,attr,,,,areasqkm,
+latitude, centroid latitude of hru, decimal degrees,double,False,attr,,,,centroid_y,
+elev, mean elevation of hru, m,double,False,attr,,,,mean.elevation,
+uadj, The average wind function during rain-on-snow periods,mm/mb/6 hr,double,True,iceberg,0.01,0.2,0.05,,
+si, The mean areal water equivalent above which there is always 100 percent areal snow cover,mm,double,True,const,0,10000,500,,
+adc1, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.05,0.05,0.05,,
+adc2, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.1,0.1,0.1,,
+adc3, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.2,0.2,0.2,,
+adc4, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.3,0.3,0.3,,
+adc5, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.4,0.4,0.4,,
+adc6, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.5,0.5,0.5,,
+adc7, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.6,0.6,0.6,,
+adc8, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.7,0.7,0.7,,
+mfmin, Minimum melt factor during non-rain periods – assumed to occur on December 21,mm/˚C/hr,double,True,iceberg,0.01,0.6,0.2,,
+hru_id, local hru ids for multiple hrus,,string,False,const,,,,,
+scf,The multiplying factor which adjusts precipitation that is determined to be in the form of snow,,double,True,const,0.9,1.8,1.1,,
+adc9, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.8,0.8,0.8,,
+adc10, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,0.9,0.9,0.9,,
+adc11, Curve that defines the areal extent of the snow cover as a function of how much of the original snow cover remains after significant bare ground shows up,NULL,double,False,const,1,1,1,,
+nmf, Maximum negative melt factor,mm/˚C/hr,double,True,const,0.01,0.3,0.15,,
+tipm, Antecedent temperature index parameter,,double,True,const,0,1,0.1,,
+pxtemp, Antecedent temperature index parameter,˚C,double,True,const,0.5,5,1,,
+mbase, Base temperature for snowmelt computations during non-rain periods,˚C,double,False,const,0,0,0,,
+plwhc, maximum amount of liquid water as a fraction of the ice portion of the snow that can be held against gravity drainage, %,double,True,const,0.01,0.3,0.03,,
+daygm, Constant daily amount of melt which takes place at the snow-soil interface whenever there is a snow cover,mm/day,double,True,const,0,0.5,0,,
diff --git a/data/module_ipes/topmodel_params.csv b/data/module_ipes/topmodel_params.csv
new file mode 100644
index 0000000..28b8547
--- /dev/null
+++ b/data/module_ipes/topmodel_params.csv
@@ -0,0 +1,26 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+subcat,character title of subcatment; often same as model title,,char,False,const,,,,,
+szm,exponential scaling parameter for the decline of transmissivity with increase in storage deficit; units of depth,meters,double,True,const,0.001,0.25,0.0125,,
+t0,downslope transmissivity when the soil is just saturated to the surface,meters/hour,double,True,const,0,0.0001,0.000075,,
+td,unsaturated zone time delay per unit storage deficit,hours,double,True,const,0.001,40,20,,
+chv,average channel flow velocity,meters/hour,double,True,const,50,2000,1000,,
+rv,internal overland flow routing velocity,meters/hour,double,True,const,50,2000,1000,,
+srmax,maximum root zone storage deficit,meters,double,True,const,0.005,0.05,0.04,,
+Q0,initial subsurface flow per unit area,meters/hour,double,False,const,0,,0.0000328,,
+sr0,initial root zone storage deficit below field capacity,meters,double,True,const,0,0.1,0,,
+infex,set to 1 to call subroutine to do infiltration excess calcs; not usually appropriate in catchments where Topmodel is applicable (shallow highly permeable soils); default to 0,,int,False,const,0,1,,,
+xk0,surface soil hydraulic conductivity,meters/hour,double,True,const,0.0001,0.2,2,,
+hf,wetting front suction for G&A soln.,meters,double,False,const,0.01,0.5,0.1,,
+dth,water content change across the wetting front; dimensionless,,double,False,const,0.01,0.6,0.1,,
+num_sub_catchments,number of subcatments; BMI adaption always sets to 1 as loop to be handled by framework,,int,False,const,1,1,,,
+imap,ordinarily tells code to write map; NOT IMPLEMENTED,,int,False,const,,,,,
+yes_print_output,set equal to 1 to print output files,,int,False,const,,,,,
+subcat,the name of each sub-catchment,,string,False,const,,,,,
+num_topodex_values,number of topodex histogram values,,int,False,const,1,30,,,
+area,catchment area as % to whole catchment (set to 1),,double,False,const,0,1,,,
+dist_area_lnaotb,the distribution of area corresponding to ln(A/tanB) histo.,,double,False,const,0,1,,,
+lnaotb,ln(a/tanB) values; TWI,,double,False,attr,,,,dist_4.twi,
+num_channels,number of channels,,int,False,const,1,10,1,,
+cum_dist_area_with_dist,channel cum. distr. of area with distance,,double,False,const,0,1,1,,
+dist_from_outlet,distance from outlet to point on channel with area known,meters,double,False,attr,0,,,lengthkm,
+
diff --git a/data/module_ipes/ueb_params.csv b/data/module_ipes/ueb_params.csv
new file mode 100644
index 0000000..05d0ab2
--- /dev/null
+++ b/data/module_ipes/ueb_params.csv
@@ -0,0 +1,66 @@
+name,description,units,data_type,calibratable,source,min,max,default_value,divide_attr_name,source_file
+USic,Energy content initial condition,kg m-3,double,False,const,,,0,,
+WSis,Snow water equivalent initial condition,m,double,False,const,,,0,,
+Tic,Snow surface dimensionless age initial condition,NULL,double,False,const,,,0,,
+WCic,Snow water equivalent dimensionless age initial condition,m,double,False,const,,,0,,
+df,Drift factor multiplier,NULL,double,True,const,0.5,6,1,,
+apr,Average atmospheric pressure,Pa,double,True,iceberg,30000,101325,,atm_pres,
+Aep,Albedo extinction coefficient,m,double,False,const,NULL,NULL,0.1,,
+cc,Canopy coverage fraction,NULL,double,True,const,0,0.8,0.4,,
+hcan,Canopy height,m,integer,True,const,0,10,5,,
+lai,Leaf area index,NULL,integer,True,const,0,4,2,,
+sbar,Maximum snow load held per unit branch area,kg/m^2,double,False,const,,,6.6,,
+ycage,Forest age flag for wind speed profile parameterization,NULL,double,False,const,2,3,2.5,,
+Slope,A 2-D grid that contains the slope at each grid point,degrees,double,False,attr,,,,mean.slope,
+aspect,A 2-D grid that contains the aspect at each grid point,degrees,double,False,attr,,,,circ_mean.aspect,
+latitude,A 2-D grid that contains the latitude at each grid point,degrees,double,False,attr,,,,centroid_y,
+subalb,The fraction of shortwave radiation (fraction 0-1) reflected by the substrate beneath the snow (ground or glacier),NULL,double,True,const,0.25,0.7,0.25,,
+subtype,Type of beneath snow substrate ,NULL,integer,False,const,0,3,0,,
+gsurf,The fraction of surface melt that runs off (e.g. from a glacier),NULL,double,False,const,,,0,,
+b01,Monthly mean of daily temperature range for January used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b02,Monthly mean of daily temperature range for February used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b03,Monthly mean of daily temperature range for March used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b04,Monthly mean of daily temperature range for April used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b05,Monthly mean of daily temperature range for May used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b06,Monthly mean of daily temperature range for June used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b07,Monthly mean of daily temperature range for July used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b08,Monthly mean of daily temperature range for August used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b09,Monthly mean of daily temperature range for September used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b10,Monthly mean of daily temperature range for October used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b11,Monthly mean of daily temperature range for November used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+b12,Monthly mean of daily temperature range for December used in Bristow Campbell formulas for atmospheric transmissivity,C,double,False,iceberg,,,,,ueb_deltat
+ts_last,Snow surface temperature one day prior to the model starting time,C,double,False,const,,,-9999,,ueb_deltat
+longitude,A 2-D grid that contains the longitude at each grid,degrees,double,False,attr,,,,centroid_x,
+ems,Emissivity of snow,NULL,double,True,const,0.98,0.99,0.99,,
+cg,Ground heat capacity,KJ/kg/˚C,double,True,const,2.09,2.12,2.09,,
+zo,Roughness length,m,double,True,const,0.0002,0.014,0.01,,
+rho,Snow density,kg/m3,double,True,const,100,600,300,,
+rhog,Soil density,kg/m3,double,True,const,1100,1700,1700,,
+ks,Snow saturated hydraulic conductivity,m/hr,integer,True,const,0,20,20,,
+de,Thermally active soil depth,m,double,True,const,0.1,0.4,0.1,,
+avo,Visual new snow albedo,,double,True,const,0.85,0.95,0.95,,
+irad,Radiation control flag,NULL,integer,False,const,0,2,2,,
+ireadalb,Albedo reading control flag,NULL,integer,False,const,0,1,0,,
+tr,Rain threshold temperature,˚C,double,False,const,,,3,,
+ts,Snow threshold temperature,˚C,double,False,const,,,-1,,
+z,Air measurement height,m,double,False,const,,,2,,
+lc,Liquid holding capacity,NULL,double,False,const,,,0.05,,
+anir0,NIR new snow albedo,NULL,double,False,const,,,0.65,,
+lans,Thermal conductivity of surface snow,kJ/ m/C/ hr,double,False,const,,,1,,
+lang,Thermal conductivity of soil,kJ/ m/C/ hr,double,False,const,,,4,,
+wlf,Low frequency surface temperature parameter,rad/hr,double,False,const,,,0.0654,,
+rd1,Damping depth adjustment parameter,NULL,double,False,const,,,1,,
+dnews,New snow threshold depth,m,double,False,const,,,0.001,,
+emc,Canopy emissivity,NULL,double,False,const,,,0.98,,
+alpha,Shortwave leaf scattering coefficient,NULL,double,False,const,,,0.5,,
+alphal,Scattering coefficient for long wave radiation,NULL,double,False,const,,,0,,
+g,Leaf orientation geometry factor,degree,double,False,const,,,0.5,,
+uc,Unloading rate coefficient,hr-1,double,False,const,,,0.00463,,
+as,Cloudy atmospheric transmissivity,NULL,double,False,const,,,0.25,,
+bs,Clear sky atmospheric transmissivity increment,NULL,double,False,const,,,0.5,,
+lambda,Clear sky direct radiation fraction,NULL,double,False,const,,,0.857,,
+rimax,Richardson number upper bound,NULL,double,False,const,,,0.16,,
+wcoeff,Forest wind decay coefficient,NULL,double,False,const,,,0.5,,
+a,Transmissivity parameter,NULL,double,False,const,,,0.8,,
+c,Transmissivity exponent,NULL,double,False,const,,,2.4,,
+
diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api
new file mode 100644
index 0000000..9f33a5f
--- /dev/null
+++ b/docker/Dockerfile.api
@@ -0,0 +1,36 @@
+FROM python:3.12-slim
+
+# Install system dependencies including GDAL
+RUN apt-get update && apt-get install -y \
+ curl \
+ gdal-bin \
+ libgdal-dev \
+ gcc \
+ g++ \
+ && rm -rf /var/lib/apt/lists/*
+
+# Set GDAL environment variables
+ENV GDAL_CONFIG=/usr/bin/gdal-config
+ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
+ENV C_INCLUDE_PATH=/usr/include/gdal
+
+# Install curl for UV installation
+RUN apt-get update && apt-get install -y curl
+
+# Install UV properly by copying from the official image
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+# Set the working directory
+WORKDIR /app
+
+# Copy the content of the build context to /app
+COPY ../ /app
+
+RUN uv venv
+ENV PATH="/app/.venv/bin:$PATH"
+
+# Install the package in development mode
+RUN uv sync
+
+# Run the API
+CMD ["python", "-m", "app.main"]
diff --git a/docker/compose.yaml b/docker/compose.yaml
new file mode 100644
index 0000000..19cd5b7
--- /dev/null
+++ b/docker/compose.yaml
@@ -0,0 +1,9 @@
+services:
+ myapp:
+ build:
+ context: ../
+ dockerfile: docker/Dockerfile.api
+ ports:
+ - "8000:8000"
+ env_file:
+ - ../.env
diff --git a/docs/description.md b/docs/description.md
new file mode 100644
index 0000000..dcab003
--- /dev/null
+++ b/docs/description.md
@@ -0,0 +1,98 @@
+# Icefabric: Lakehouse Architecture for Hydrologic Data Management
+
+
+ { width="600" }
+ The icefabric lake house architecture. Data is moved from sources to an underlying specificed format (iceberg/icechunk) and queried to consumers via APIs and services.
+
+
+
+## Overview
+
+Icefabric implements a modern **lakehouse architecture** to combine the flexibility of data lakes with the performance and governance of data warehouse. This system provides versioned, centralized access to hydrologic datasets to support the National Water Model.
+
+## The Problem: Hydrologic Data Complexity
+
+### Traditional Challenges
+
+Hydrologic research and operations face unique data management challenges:
+
+- **Heterogeneous Data Sources**: Datasets are sourced from different agencies in various formats
+- **Multiple Formats**: Tabular, vectorized, COGs, etc
+- **Version Control Needs**: Hydrofabric topology updates, data quality improvements, and research reproducibility
+
+### Why Traditional Solutions Fall Short
+
+**Traditional database systems** struggle with:
+
+- Large geospatial datasets and complex geometries
+- Schema evolution for evolving datasets
+- Version control for scientific workflows
+
+**File-based approaches** suffer from:
+
+- Data duplication and storage inefficiencies
+- Lack of ACID transactions
+- Manual version management
+- Limited discovery and access controls
+
+## Lakehouse Architecture Solution
+
+### Technology Stack Rationale
+
+=== "Apache Iceberg - Structured Data"
+
+ **Used For:**
+ - Hydrofabric geospatial products
+ - Streamflow observations time series (USGS, Local Agencies)
+ - Cross-section geometries (RAS XS [MIP/BLE])
+
+ **Why Iceberg:**
+ - **ACID Transactions**: Ensure data consistency during hydrofabric updates
+ - **Schema Evolution**: Handle network topology changes without breaking existing workflows
+ - **Time Travel**: Access historical network versions for model comparisons
+ - **Performance**: Optimized queries across continental-scale datasets
+ - **Partition Pruning**: Efficient spatial and temporal filtering
+
+=== "Icechunk - Array Data"
+
+ **Used For:**
+ - Topobathy elevation surfaces
+ - Land cover classifications
+
+ **Why Icechunk:**
+ - **Virtual References**: Avoid duplicating large elevation datasets
+ - **Zarr Compatibility**: Seamless integration with scientific Python ecosystem
+ - **Git-like Versioning**: Branch/merge workflows for experimental processing
+ - **Chunked Storage**: Optimized for geospatial access patterns
+ - **Compression**: Efficient storage of repetitive classification data
+
+## Benefits Realized
+
+### For Hydrologic Research
+
+- **Reproducible Science**: Exact data versions enable repeatable research
+- **Collaborative Workflows**: Branching enables parallel research without conflicts
+- **Quality Evolution**: Track data quality improvements over time
+
+### For Operational Forecasting
+
+- **Consistent Baselines**: Stable data versions for operational model runs
+- **Real-time Integration**: Fast access to latest observations and forecasts
+- **Rollback Capabilities**: Quick recovery from data quality issues
+
+### For Data Management
+
+- **Access Unification**: Single API for diverse hydrologic data types
+- **Version Management**: Automated tracking eliminates manual version confusion
+- **Quality Assurance**: Built-in validation prevents bad data propagation
+
+## Conclusion
+
+The Icefabric lakehouse architecture addresses fundamental challenges in hydrologic data management through:
+
+1. **Unified Access**: Single interface for diverse water data sources
+3. **Version Control**: Git-like workflows for scientific data management
+4. **Quality Assurance**: Automated validation and lineage tracking
+6. **Research Support**: Reproducible environments for collaborative science
+
+This architecture enables EDFS to provide reliable, versioned, high-performance access to critical water resources data supporting both operational forecasting and cutting-edge research.
diff --git a/docs/img/icefabric.png b/docs/img/icefabric.png
new file mode 100644
index 0000000..f52c528
Binary files /dev/null and b/docs/img/icefabric.png differ
diff --git a/docs/img/icefabric_api.png b/docs/img/icefabric_api.png
new file mode 100644
index 0000000..3801a63
Binary files /dev/null and b/docs/img/icefabric_api.png differ
diff --git a/docs/img/icefabric_mission.png b/docs/img/icefabric_mission.png
new file mode 100644
index 0000000..3a3ffc5
Binary files /dev/null and b/docs/img/icefabric_mission.png differ
diff --git a/docs/img/icefabric_version.png b/docs/img/icefabric_version.png
new file mode 100644
index 0000000..2762cc4
Binary files /dev/null and b/docs/img/icefabric_version.png differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..2ac7447
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,58 @@
+# Welcome to Icefabric
+
+!!! warning "In Progress"
+ These docs are a work in progress and will continously be updated
+
+# Icefabric
+
+An [Apache Iceberg](https://py.iceberg.apache.org/)/[Icechunk](https://icechunk.io/en/latest/) implementation of the Hydrofabric to disseminate continental hydrologic data
+
+!!! note
+ To run any of the functions in this repo your AWS test account credentials + `AWS_DEFAULT_REGION="us-east-1"` need to be in your `.env` file and your `.pyiceberg.yaml` settings need to up to date
+
+### Getting Started
+This repo is managed through [UV](https://docs.astral.sh/uv/getting-started/installation/) and can be installed through:
+```sh
+uv sync
+source .venv/bin/activate
+```
+
+### Running the API locally
+To run the API locally, ensure your `.env` file in your project root has the right credentials, then run
+```sh
+python -m app.main
+```
+This should spin up the API services at `localhost:8000/`.
+
+If you are running the API locally, you can run
+```sh
+python -m app.main --catalog sql
+```
+
+### Building the API through Docker
+To run the API locally with Docker, ensure your `.env` file in your project root has the right credentials, then run
+```sh
+docker compose -f docker/compose.yaml build --no-cache
+docker compose -f docker/compose.yaml up
+```
+This should spin up the API services
+
+
+### Development
+To ensure that icefabric follows the specified structure, be sure to install the local dev dependencies and run `pre-commit install`
+
+### Documentation
+To build the user guide documentation for Icefabric locally, run the following commands:
+```sh
+uv sync --extra docs
+mkdocs serve -a localhost:8080
+```
+Docs will be spun up at localhost:8080/
+
+### Pytests
+
+The `tests` folder is for all testing data so the global confest can pick it up. This allows all tests in the namespace packages to share the same scope without having to reference one another in tests
+
+To run tests, run `pytest -s` from project root.
+
+To run the subsetter tests, run `pytest --run-slow` as these tests take some time. Otherwise, they will be skipped
diff --git a/docs/modules/index.md b/docs/modules/index.md
new file mode 100644
index 0000000..0651976
--- /dev/null
+++ b/docs/modules/index.md
@@ -0,0 +1,3 @@
+# Modules
+
+The following pages are to document the support from the EDFS team towards creating configuration files for NWM Modules.
diff --git a/docs/modules/sft.md b/docs/modules/sft.md
new file mode 100644
index 0000000..519c203
--- /dev/null
+++ b/docs/modules/sft.md
@@ -0,0 +1,191 @@
+# SFT (Soil Freeze-Thaw) Module Documentation
+
+## Overview
+
+The SFT (Soil Freeze-Thaw) module simulates the freeze-thaw processes in soil columns and is used in cold regions where freeze-thaw cycles significantly affect water movement and storage.
+
+## Parameter Reference
+
+### Core Parameters
+
+| Parameter | Description | Units | Data Type | Default | Calibratable |
+|-----------|-------------|--------|-----------|---------|--------------|
+| `end_time` | Simulation duration. If no unit is specified, defaults to hour | s, sec, h, hr, d, day | double | `1.[d]` | FALSE |
+| `dt` | Size of a simulation timestep. If no unit is specified, defaults to hour | s, sec, h, hr, d, day | double | `1.0[h]` | FALSE |
+| `verbosity` | Logging verbosity level | - | string | `none` | FALSE |
+
+**Options for verbosity:** `high`, `low`, `none`
+
+### Soil Physical Properties
+
+These properties are based on Hydrofabric divide attributes provided in the latest enterprise version.
+
+| Parameter | Description | Units | Data Type | Default | Calibratable |
+|-----------|-------------|--------|-----------|---------|--------------|
+| `soil_params.smcmax` | Maximum soil moisture content | - | double | - | TRUE |
+| `soil_params.b` | Soil moisture retention curve parameter (bexp) | - | double | - | TRUE |
+| `soil_params.satpsi` | Saturated soil suction (psisat) | - | double | - | TRUE |
+| `soil_params.quartz` | Soil quartz content, used in soil thermal conductivity function of Peters-Lidard | - | double | `1.0` | FALSE |
+
+### Domain Configuration
+
+| Parameter | Description | Units | Data Type | Default | Calibratable |
+|-----------|-------------|--------|-----------|---------|--------------|
+| `soil_z` | Vertical resolution of the soil column (computational domain of the SFT model) | m | array[double] | `[0.1, 0.3, 1.0, 2.0]` | FALSE |
+| `soil_temperature` | Initial soil temperature for the discretized column | K | array[double] | - | FALSE |
+
+**Ice Fraction Scheme Options:**
+
+The following ice fraction schemes are dictated by what version of CFE is used
+
+- `Schaake`: Traditional Schaake ice fraction calculation
+- `Xinanjiang`: Xinanjiang ice fraction calculation method (default)
+
+## Data Structures
+
+### SFT Configuration Model
+
+The SFT module uses a Pydantic model to validate and structure configuration parameters:
+
+```python
+class SFT(BaseModel):
+ catchment: str # Catchment identifier
+ verbosity: str = "none" # Logging level
+ soil_moisture_bmi: int = 1 # BMI soil moisture flag
+ end_time: str = "1.[d]" # Simulation duration
+ dt: str = "1.0[h]" # Time step
+ soil_params_smcmax: float # Maximum soil moisture
+ soil_params_b: float # Soil retention parameter
+ soil_params_satpsi: float # Saturated soil suction
+ soil_params_quartz: float = 1.0 # Quartz content
+ ice_fraction_scheme: IceFractionScheme # Ice fraction method
+ soil_z: list[float] = [0.1, 0.3, 1.0, 2.0] # Soil layer depths
+ soil_temperature: list[float] # Initial temperatures
+```
+
+### Ice Fraction Schemes
+
+```python
+class IceFractionScheme(str, enum.Enum):
+ SCHAAKE = "Schaake"
+ XINANJIANG = "Xinanjiang"
+```
+
+## Usage
+
+### Command Line Interface
+
+The SFT config text files can be created using the `icefabric` CLI tool:
+
+```bash
+icefabric params \
+ --gauge "01010000" \
+ --module "sft" \
+ --domain "conus" \
+ --catalog "glue" \
+ --ice-fraction "xinanjiang" \
+ --output "./output"
+```
+
+**CLI Parameters:**
+- `--gauge`: Gauge ID to trace upstream catchments from
+- `--module`: Module type (use "sft" for Soil Freeze-Thaw)
+- `--domain`: Hydrofabric domain (`conus`, `alaska`, etc.)
+- `--catalog`: PyIceberg Catalog type (`glue` or `sql`)
+- `--ice-fraction`: Ice fraction scheme (`schaake` or `xinanjiang`)
+- `--output`: Output directory for configuration files
+
+### REST API
+
+The SFT module is also accessible via REST API:
+
+```http
+GET /modules/sft/?identifier=01010000&domain=conus&use_schaake=false
+```
+
+**API Parameters:**
+- `identifier` (required): Gauge ID to trace upstream from
+- `domain` (optional): Geographic domain (default: `conus`)
+- `use_schaake` (optional): Use Schaake ice fraction scheme (default: `false`)
+
+**Response:** Returns a list of SFT configuration objects, one for each upstream catchment.
+
+### Python API
+
+Direct Python usage:
+
+```python
+from icefabric.modules import get_sft_parameters
+from icefabric.schemas.hydrofabric import HydrofabricDomains
+from pyiceberg.catalog import load_catalog
+
+# Load catalog
+catalog = load_catalog("glue")
+
+# Get SFT parameters
+configs = get_sft_parameters(
+ catalog=catalog,
+ domain=HydrofabricDomains.CONUS,
+ identifier="01010000",
+ use_schaake=False
+)
+
+# Each config is an SFT pydantic model
+for config in configs:
+ print(f"Catchment: {config.catchment}")
+ print(f"Soil layers: {config.soil_z}")
+ print(f"Initial temperatures: {config.soil_temperature}")
+```
+
+## Parameter Estimation
+
+The system automatically estimates initial parameters from hydrofabric data:
+
+### Soil Parameters
+- **smcmax**: Calculated as mean across available soil moisture maximum values
+- **b (bexp)**: Derived from mode of soil moisture retention curve parameters
+- **satpsi**: Calculated as geometric mean of saturated soil suction values
+- **quartz**: Default value of 1.0 (assuming high quartz content)
+
+### Temperature Initialization
+- **soil_temperature**: Currently set to a uniform 45°F (280.37K) across all layers
+- This represents a reasonable estimate for mean soil temperature
+
+### Spatial Resolution
+- **soil_z**: Default 4-layer discretization [0.1, 0.3, 1.0, 2.0] meters
+- Provides adequate resolution for freeze-thaw processes
+
+## Output Files
+
+The CLI and API generate BMI-compatible configuration files:
+
+```
+{catchment_id}_bmi_config_sft.txt
+```
+
+Example file content:
+```
+verbosity=none
+soil_moisture_bmi=1
+end_time=1.[d]
+dt=1.0[h]
+soil_params.smcmax=0.434
+soil_params.b=4.05
+soil_params.satpsi=0.0355
+soil_params.quartz=1.0
+ice_fraction_scheme=Xinanjiang
+soil_z=0.1,0.3,1.0,2.0[m]
+soil_temperature=280.37,280.37,280.37,280.37[K]
+```
+
+## Notes and Limitations
+
+1. **Temperature Initialization**: Current implementation uses uniform 45°F across all soil layers. Future versions should implement depth-dependent temperature profiles.
+
+2. **Parameter Weighting**: Soil parameters are currently averaged with equal weighting rather than weighted averaging based on layer thickness.
+
+3. **Quartz Support**: The `soil_params.quartz` was removed in v2.2 of the Hydrofabric and is defaulted to 1.0
+
+4. **Spatial Coverage**: Parameter estimation depends on available hydrofabric data coverage for the specified domain.
+
+5. **Temporal Considerations**: Initial parameters represent steady-state estimates. Actual model runs may require spin-up periods for equilibration.
diff --git a/docs/user_guide/icefabric_api.md b/docs/user_guide/icefabric_api.md
new file mode 100644
index 0000000..1625713
--- /dev/null
+++ b/docs/user_guide/icefabric_api.md
@@ -0,0 +1,275 @@
+# Icefabric API Guide
+
+## Overview
+
+The Icefabric API is a FastAPI-based service that provides access to EDFS data stored in Apache Iceberg format. The API offers multiple data export formats and metadata endpoints for the hydrofabric and streamflow observations.
+
+## Architecture
+
+The API consists of several key components:
+
+1. **Main Application** (`app/main.py`) - FastAPI application with health checks and router configuration
+2. **Data Routers** - Handles all data endpoints. Streamflow observations, Hydrofabric subsetting, National Water Model module configuration, and HEC-RAS cross-section retrieval are supported.
+3. **Apache Iceberg Backend** - Defaults to hosted AWS Glue catalog. Local SQLite-backed catalog may be built using instructions below.
+
+### Running the API locally
+To run the API locally, ensure your `.env` file in your project root has the right credentials (`test`), then run
+```sh
+uv sync
+source .venv/bin/activate
+python -m app.main
+```
+This should spin up the API services at `localhost:8000/`
+
+### Building the API through Docker
+To run the API locally with Docker, ensure your `.env` file in your project root has the right credentials, then run
+```sh
+docker compose -f docker/compose.yaml build --no-cache
+docker compose -f docker/compose.yaml up
+```
+This should spin up the API services
+
+### Running the API with a local Iceberg catalog - Advanced Use
+To run the API locally against a local catalog, the catalog must first be exported from glue. In the following code block, run build script for as many catalog namespaces as you need. Ensure your `.env` file in your project root has the right credentials (`test`), then run
+```sh
+uv sync
+source .venv/bin/activate
+python tools/pyiceberg/export_catalog.py --namespace conus_hf
+# Run additional tool times with other namespaces as necessary
+```
+
+To view the namespaces hosted on glue, you can run the following commands in the terminal:
+```python
+>>> from pyiceberg.catalog import load_catalog
+>>> catalog = load_catalog("glue")
+>>> catalog.list_namespaces()
+```
+
+
+To run the API locally with a local SQL backend, ensure your `.env` file in your project root has the right credentials (`test`), then run
+```sh
+uv sync
+source .venv/bin/activate
+python -m app.main --catalog sql
+```
+This should spin up the API services
+
+## How It Works
+
+### Data Flow
+
+1. **Request Processing** - Validates data source and identifier parameters
+2. **Data Filtering** - Applies optional date range filters to Iceberg tables
+3. **Format Conversion** - Exports data in requested format (CSV/Parquet)
+4. **Response Generation** - Returns data with appropriate headers and metadata
+
+### Supported Data Sources
+
+#### Observations
+Currently supports:
+
+- **USGS** - United States Geological Survey hourly streamflow data
+
+#### Hydrofabric
+Provides geospatial watershed data:
+
+- **Subset Generation** - Creates upstream watershed subsets from identifiers
+
+!!! note "Data Storage"
+ All data is stored remotely as Apache Iceberg tables on AWS glue unless you built the catalog locally. Then, it is stored at SQLite-backed catalog locally built at `/tmp/warehouse/pyiceberg_catalog.db`
+
+### National Water Model Modules
+Retrieve National Water Model (NWM) module parameters.
+
+Currently supports:
+
+- **Soil Freeze Thaw (SFT)** - Retrieve paramters for Soil Freeze Thaw module
+- **TopoFlow-Glacier** - Retrieve parameters for the TopoFlow Glacier module
+
+### RAS Cross-sections
+Retrieves geopackage data of HEC-RAS cross-sections
+
+Currently supports:
+
+- **HUC ID**: Download a geopackage for given HUC ID
+- **HUC ID** and **Reach ID**: Download a geopackage for a given HUC ID and Reach ID
+
+## Usage Examples
+
+### Streamflow Observations
+
+```python
+import requests
+import pandas as pd
+from io import StringIO, BytesIO
+
+base_url = "http://localhost:8000/v1/streamflow_observations"
+
+# Get available data sources
+sources = requests.get(f"{base_url}/sources").json()
+
+# Get available identifiers for USGS
+identifiers = requests.get(f"{base_url}/usgs/available", params={"limit": 10}).json()
+
+# Get station information
+station_info = requests.get(f"{base_url}/usgs/01031500/info").json()
+print(f"Station has {station_info['total_records']} records")
+
+# Download CSV data with date filtering
+csv_response = requests.get(
+ f"{base_url}/usgs/csv",
+ params={
+ "identifier": "01031500",
+ "start_date": "2023-01-01T00:00:00",
+ "end_date": "2023-01-31T00:00:00",
+ "include_headers": True
+ }
+)
+df_csv = pd.read_csv(StringIO(csv_response.text))
+
+# Download Parquet data (recommended for large datasets)
+parquet_response = requests.get(
+ f"{base_url}/usgs/parquet",
+ params={
+ "identifier": "01031500",
+ "start_date": "2023-01-01T00:00:00"
+ }
+)
+df_parquet = pd.read_parquet(BytesIO(parquet_response.content))
+```
+
+### Hydrofabric Subset
+
+```python
+import requests
+
+# Download hydrofabric subset as geopackage
+response = requests.get("http://localhost:8000/v1/hydrofabric/01010000/gpkg")
+
+if response.status_code == 200:
+ with open("hydrofabric_subset_01010000.gpkg", "wb") as f:
+ f.write(response.content)
+ print(f"Downloaded {len(response.content)} bytes")
+else:
+ print(f"Error: {response.status_code}")
+```
+
+## Performance Considerations
+
+### Data Format Recommendations
+
+| Dataset Size | Recommended Format | Reason |
+|-------------|-------------------|---------|
+| < 50,000 records | CSV | Simple, widely supported |
+| > 50,000 records | Parquet | Better compression, faster processing |
+| > 200,000 records | Parquet + date filters | Reduced data transfer |
+
+## Development
+
+### Running the API
+
+```bash
+# Install dependencies
+uv sync
+
+# Start development server
+python -m app.main
+```
+
+### Adding New Data Observation Sources
+
+To add a new data source, update the configuration in your router:
+
+Below is an example for the observations router
+
+```python
+class DataSource(str, Enum):
+ USGS = "usgs"
+ NEW_SOURCE = "new_source" # Add new source
+
+# Add configuration
+DATA_SOURCE_CONFIG = {
+ DataSource.NEW_SOURCE: {
+ "namespace": "observations",
+ "table": "new_source_table",
+ "time_column": "timestamp",
+ "units": "m³/s",
+ "description": "New data source description",
+ },
+}
+```
+
+## API Documentation
+
+### Interactive Documentation
+
+The API provides interactive documentation at:
+
+- **Swagger UI**: `http://localhost:8000/docs`
+- **ReDoc**: `http://localhost:8000/redoc`
+
+### OpenAPI Schema
+
+Access the OpenAPI schema at: `http://localhost:8000/openapi.json`
+
+## Verification
+
+### Observations
+
+```bash
+# List available data sources
+curl http://localhost:8000/v1/streamflow_observations/sources
+
+# Get available identifiers (limit results)
+curl "http://localhost:8000/v1/streamflow_observations/usgs/available?limit=5"
+
+# Get data source information
+curl http://localhost:8000/v1/streamflow_observations/usgs/info
+
+# Get specific station information
+curl http://localhost:8000/v1/streamflow_observations/usgs/01010000/info
+
+# Download CSV with headers
+curl "http://localhost:8000/v1/streamflow_observations/usgs/csv?identifier=01010000&include_headers=true"
+
+# Download CSV with date filtering
+curl "http://localhost:8000/v1/streamflow_observations/usgs/csv?identifier=01010000&start_date=2021-12-31T14%3A00%3A00&end_date=2022-01-01T14%3A00%3A00&include_headers=true"
+
+# Download Parquet file
+curl "http://localhost:8000/v1/streamflow_observations/usgs/parquet?identifier=01010000&start_date=2021-12-31T14%3A00%3A00&end_date=2022-01-01T14%3A00%3A00" -o "output.parquet"
+```
+
+### Hydrofabric
+
+```bash
+# Download hydrofabric subset
+curl "http://localhost:8000/v1/hydrofabric/01010000/gpkg" -o "subset.gpkg"
+
+# Download with different identifier
+curl "http://localhost:8000/v1/hydrofabric/01031500/gpkg" -o "subset.gpkg"
+```
+
+### NWM Modules
+```bash
+# Return parameters for Soil Freeze Thaw by catchment
+curl "http://localhost:8000/v1/modules/sft/?identifier=01010000&domain=conus_hf&use_schaake=false"
+
+# Return albedo value for given catchment state (snow, ice, or other)
+curl "http://localhost:8000/v1/modules/topoflow/albedo?landcover=snow"
+```
+
+### RAS Cross-sections
+```bash
+# Download RAS cross-sections for a HUC ID
+curl "http://localhost:8000/v1/ras_xs/02040106/" -o "ras_02040106.gpkg"
+
+# Download RAS cross-sections for a HUC ID and Reach ID
+curl "http://localhost:8000/v1/ras_xs/02040106/dsreachid=4188251" -o "ras_02040106_4188251.gpkg"
+```
+
+### Health Check
+
+```bash
+# Check API health
+curl http://localhost:8000/health
+```
diff --git a/docs/user_guide/icefabric_tools.md b/docs/user_guide/icefabric_tools.md
new file mode 100644
index 0000000..48946b2
--- /dev/null
+++ b/docs/user_guide/icefabric_tools.md
@@ -0,0 +1,77 @@
+# Icefabric Tools
+
+A series of compute services built on top of version controlled EDFS data
+
+## Hydrofabric Geospatial Tools
+
+### Overview
+
+The Hydrofabric Geospatial Tools module provides Python functions for subsetting and analyzing hydrofabric data stored in Apache Iceberg format
+
+### Functionality
+
+- **Data Subsetting** - the `subset()` function provides all upstream catchments related to a given gauge
+
+### Usage Examples
+
+#### Basic Subsetting
+
+```python
+from pathlib import Path
+from pyiceberg.catalog import load_catalog
+from icefabric_tools import subset, IdType
+
+# Load the catalog using default settings
+catalog = load_catalog("glue")
+
+# Basic subset using a hydrofabric ID
+result = subset(
+ catalog=catalog,
+ identifier="wb-10026",
+ id_type=IdType.ID,
+ layers=["divides", "flowpaths", "network", "nexus"]
+)
+
+# Access the filtered data
+flowpaths = result["flowpaths"]
+divides = result["divides"]
+network = result["network"]
+nexus = result["nexus"]
+```
+
+#### Export to GeoPackage
+
+```python
+# Export subset directly to GeoPackage
+output_path = Path("subset_output.gpkg")
+
+subset(
+ catalog=catalog,
+ identifier="01031500",
+ id_type=IdType.POI_ID,
+ layers=["divides", "flowpaths", "network", "nexus", "pois"],
+ output_file=output_path
+)
+```
+
+#### Getting all layers
+
+```python
+# Include all available layers
+all_layers = [
+ "divides", "flowpaths", "network", "nexus",
+ "divide-attributes", "flowpath-attributes",
+ "flowpath-attributes-ml", "pois", "hydrolocations"
+]
+
+result = subset(
+ catalog=catalog,
+ identifier="HUC12-010100100101",
+ id_type=IdType.HL_URI,
+ layers=all_layers
+)
+
+# Process specific layers
+pois_data = result["pois"]
+attributes = result["flowpath-attributes"]
+```
diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md
new file mode 100644
index 0000000..e38398a
--- /dev/null
+++ b/docs/user_guide/index.md
@@ -0,0 +1,5 @@
+# Icefabric
+
+## The mission
+
+The idea for the icefabric came from the need to version control datasets for the National Water Model 4.0. There were many different file formats, and hydrofabric versions, but the need for an [Apache Iceberg](https://iceberg.apache.org/) style backend was realized. The name itself, icefabric, is a reference to this.
diff --git a/docs/user_guide/terraform.md b/docs/user_guide/terraform.md
new file mode 100644
index 0000000..8249e71
--- /dev/null
+++ b/docs/user_guide/terraform.md
@@ -0,0 +1,140 @@
+# AWS S3 Tables with Apache Iceberg - Terraform Implementation
+
+!!! note
+ These docs are taken from `src/icefabric_manage/terraform/README.md`
+
+This directory contains PoC Terraform IaC for deploying Apache Iceberg tables using the AWS S3 Tables service with AWS Glue catalog integration. It also contains a basic demo / test python script used to verify things.
+
+## Architecture Overview
+
+The infrastructure creates:
+- **AWS S3 Tables bucket** - Managed storage for Iceberg table data
+- **S3 Tables namespace and table** - Logical organization for tables
+- **AWS Glue Catalog database** - Metadata storage for table schemas
+- **Lake Formation permissions** - Access control and governance
+- **IAM policies** - Secure access between services
+
+## Prerequisites
+
+### AWS Requirements
+- AWS CLI configured with appropriate credentials. (Older versions may not support AWS S3 Tables)
+- Terraform >= 1.0
+- AWS Account with permissions (basically Admin due to IAM requirements) for:
+ - S3 Tables
+ - AWS Glue
+ - Lake Formation
+ - IAM
+
+### ⚠️ Critical: Enable S3 Table Buckets Integration
+
+**This step must be completed before running Terraform**, otherwise the deployment will fail.
+
+1. Navigate to the [S3 Table Buckets Console](https://console.aws.amazon.com/s3tables/home) in your target region
+2. Locate the section titled **"Integration with AWS analytics services"**
+3. Click the **"Enable integration"** button
+4. Confirm that the integration status shows **"Enabled"** for your deployment region
+
+This integration allows services like Athena, Glue, Redshift, and EMR to interact with S3 Table Buckets. Without this step, your Iceberg tables won't be accessible through these analytics services.
+
+> **Note**: This is a one-time setup per AWS region. Once enabled, all future S3 Table Buckets in that region will have access to AWS analytics services integration.
+
+### Python Requirements
+- Python 3.8+
+- pyiceberg python module w/deps
+- boto3 (for AWS SDK)
+
+## Quick Start
+
+### 1. High Level Deploy Infrastructure
+
+Create a `terraform.tfvars` file replacing the values below as appropriate for your environment or deploy:
+
+```hcl
+env = "dev"
+application = "myapp"
+team = "NGWPC"
+region = "us-east-1"
+identity_center_role_arn = "arn:aws:iam::123456789012:role/aws-reserved/sso.amazonaws.com/AWSReservedSSO_DataRole"
+
+# Optional: Specify Lake Formation admins
+lakeformation_admin_arns = [
+ "arn:aws:iam::123456789012:role/aws-reserved/sso.amazonaws.com/AWSReservedSSO_DataRole",
+ "arn:aws:iam::123456789012:role/aws-reserved/sso.amazonaws.com/AWSReservedSSO_Admin"
+]
+```
+
+Deploy the infrastructure:
+
+```bash
+terraform init
+terraform plan
+terraform apply
+```
+
+### 3. Set Environment Variables
+
+After deployment, set these environment variables for the Python demo:
+
+```bash
+# From Terraform outputs
+export ICEBERG_WAREHOUSE_PATH=$(terraform output -raw s3tables_table_warehouse_location)
+export AWS_DEFAULT_REGION="us-east-1"
+```
+
+### 4. Install Python Dependencies in your preferred active virtual environment
+
+```bash
+pip install pyiceberg boto3 mypy_boto3_glue pyarrow
+```
+
+### 5. Run the Demo
+
+```bash
+python iceberg_demo.py
+```
+
+## Terraform Configuration
+
+### Variables
+
+| Variable | Description | Type | Default | Required |
+|----------|-------------|------|---------|----------|
+| `region` | AWS region | string | `us-east-1` | No |
+| `env` | Environment name (test/oe/other) | string | - | Yes |
+| `application` | Application name | string | - | Yes |
+| `team` | Team name (for future tagging if supported) | string | - | Yes |
+| `identity_center_role_arn` | IAM role ARN for accessing resources | string | - | Yes |
+| `lakeformation_admin_arns` | Lake Formation administrator ARNs | list(string) | `[]` | No |
+
+### Outputs
+
+| Output | Description |
+|--------|-------------|
+| `s3tables_bucket_arn` | ARN of the S3 Tables bucket |
+| `s3tables_table_warehouse_location` | Warehouse location for Iceberg tables (devs need this!!!) |
+| `glue_database_name` | Name of the Glue catalog database |
+| `lakeformation_admins` | List of Lake Formation administrators |
+
+## Python Integration
+
+### Basic Usage
+
+The provided `iceberg_demo.py` demonstrates:
+- Connecting to AWS Glue catalog
+- Creating/loading Iceberg tables
+- Very Basic schema definition
+
+### Configuration
+
+The Python script uses these environment variables:
+- `ICEBERG_WAREHOUSE_PATH` - S3 Tables warehouse location
+- `AWS_REGION` - AWS region for services
+- `AWS_DEFAULT_REGION` - Default AWS region
+
+## Permissions and Security
+
+### Lake Formation Integration
+
+The infrastructure automatically configures basic Lake Formation settings. This can get very granular in the future.
+- Database-level permissions for the specified Identity Center role (SoftwareEngineersFull)
+- Table-level permissions are supported, but have not been tested
diff --git a/examples/iceberg/iceberg_version_control_examples.ipynb b/examples/iceberg/iceberg_version_control_examples.ipynb
new file mode 100644
index 0000000..9b6ea74
--- /dev/null
+++ b/examples/iceberg/iceberg_version_control_examples.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Apache Iceberg Version Control for Hydrofabric and Streamflow Data\n",
+ "\n",
+ "### Overview\n",
+ "\n",
+ "This notebook demonstrates **enterprise-grade version control capabilities** for hydrological datasets using Apache Iceberg. We'll showcase how the hydrofabric and streamflow observations can be managed with full version control.\n",
+ "\n",
+ "#### What is Apache Iceberg?\n",
+ "\n",
+ "**Apache Iceberg** is a high-performance table format designed for large-scale data lakes. Unlike traditional file formats, Iceberg provides:\n",
+ "\n",
+ "- **Automatic snapshots** of every data change\n",
+ "- **Time travel queries** to access historical versions\n",
+ "- **ACID transactions** for data consistency\n",
+ "- **Schema evolution** without breaking existing queries\n",
+ "- **Query performance** through advanced indexing and pruning\n",
+ "- **Complete audit trails** for regulatory compliance"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import all necessary modules\n",
+ "import os\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import pyarrow as pa\n",
+ "from pyiceberg.catalog import load_catalog\n",
+ "\n",
+ "from icefabric.helpers import load_creds, load_pyiceberg_config\n",
+ "\n",
+ "# Changes the current working dir to be the project root\n",
+ "current_working_dir = Path.cwd()\n",
+ "os.chdir(Path.cwd() / \"../../\")\n",
+ "print(\n",
+ " f\"Changed current working dir from {current_working_dir} to: {Path.cwd()}. This must run at the project root\"\n",
+ ")\n",
+ "\n",
+ "\n",
+ "# dir is where the .env file is located\n",
+ "load_creds(dir=Path.cwd())\n",
+ "\n",
+ "# Loading the local pyiceberg config settings\n",
+ "pyiceberg_config = load_pyiceberg_config(Path.cwd())\n",
+ "catalog = load_catalog(\n",
+ " name=\"sql\",\n",
+ " type=pyiceberg_config[\"catalog\"][\"sql\"][\"type\"],\n",
+ " uri=pyiceberg_config[\"catalog\"][\"sql\"][\"uri\"],\n",
+ " warehouse=pyiceberg_config[\"catalog\"][\"sql\"][\"warehouse\"],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Exploring the Data Catalog\n",
+ "\n",
+ "Apache Iceberg organizes data into **catalogs**, **namespaces**, and **tables** - similar to databases, schemas, and tables in traditional systems. However, each table maintains complete version history automatically.\n",
+ "\n",
+ "#### Hydrofabric Tables\n",
+ "\n",
+ "The `conus_hf` namespace contains hydrofabric layers associated with the CONUS-based geopackage\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "catalog.list_tables(\"conus_hf\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's examine the **hydrolocations** table and make some versioned additions. Below we'll see both the snapshots from the hydrolocations table, and actual geopackage layer exported to a pandas dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = catalog.load_table(\"conus_hf.hydrolocations\")\n",
+ "table.inspect.snapshots()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = table.scan().to_pandas()\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Snapshot Analysis: Understanding Version History\n",
+ "\n",
+ "Each snapshot in Iceberg contains:\n",
+ "- **Unique identifier** (snapshot_id)\n",
+ "- **Summary metadata** describing the operation\n",
+ "- **Timestamp** of the change\n",
+ "- **File manifests** pointing to data files\n",
+ "- **Schema information** at that point in time\n",
+ "\n",
+ "This enables **complete traceability** of how data evolved over time."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")\n",
+ "snapshot_id = table.metadata.snapshots[0].snapshot_id"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Demonstrating Version Control: Adding New Monitoring Location\n",
+ "\n",
+ "Now we'll demonstrate Iceberg's version control by adding a **new hydrologic monitoring location**\n",
+ "\n",
+ "#### The Version Control Process:\n",
+ "\n",
+ "1. **Modify data** (add new monitoring location)\n",
+ "2. **Overwrite table** (creates new snapshot automatically)\n",
+ "3. **Preserve history** (all previous versions remain accessible)\n",
+ "4. **Track changes** (complete audit trail maintained)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_df = df.copy()\n",
+ "new_df.loc[len(new_df)] = {\n",
+ " \"poi_id\": 99999,\n",
+ " \"id\": \"wb-0\",\n",
+ " \"nex_id\": \"tnx-0\",\n",
+ " \"hf_id\": 999999,\n",
+ " \"hl_link\": \"Testing\",\n",
+ " \"hl_reference\": \"testing\",\n",
+ " \"hl_uri\": \"testing\",\n",
+ " \"hl_source\": \"testing\",\n",
+ " \"hl_x\": -1.952088e06,\n",
+ " \"hl_y\": 1.283884e06,\n",
+ " \"vpu_id\": 18,\n",
+ "}\n",
+ "new_df.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Writing Changes: Automatic Snapshot Creation\n",
+ "\n",
+ "When we write changes to an Iceberg table:\n",
+ "\n",
+ "1. **Schema validation** ensures data compatibility\n",
+ "2. **New snapshot created** automatically with unique ID\n",
+ "3. **Previous snapshots preserved** for time travel\n",
+ "4. **Metadata updated** with operation summary\n",
+ "5. **ACID guarantees** ensure consistency\n",
+ "\n",
+ "This happens **atomically** - either the entire operation succeeds or fails, with no partial states.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "_df = pa.Table.from_pandas(new_df, preserve_index=False)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)\n",
+ "table.scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Verifying New Snapshot Creation\n",
+ "\n",
+ "Let's examine the updated snapshot history. Notice how we now have **multiple snapshots**:\n",
+ "\n",
+ "1. **Original data** (initial snapshot)\n",
+ "2. **Data with new location** (our recent addition)\n",
+ "\n",
+ "Each snapshot is **completely independent** and can be accessed separately for different analyses or rollback scenarios.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Iceberg's **time travel capability** allows querying any previous snapshot using its ID\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "snapshot_id = table.metadata.snapshots[0].snapshot_id\n",
+ "snapshot_id_latest = table.metadata.snapshots[-1].snapshot_id\n",
+ "table.scan(snapshot_id=snapshot_id).to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.scan(snapshot_id=snapshot_id_latest).to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Comparing Versions: Before and After\n",
+ "\n",
+ "Notice the difference between snapshots:\n",
+ "- **Original snapshot**: Contains original monitoring locations\n",
+ "- **Latest snapshot**: Includes our new test location (poi_id: 99999)\n",
+ "\n",
+ "This demonstrates **non-destructive updates** - both versions coexist and remain queryable.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Streamflow Observations: Time Series Version Control\n",
+ "\n",
+ "Now let's examine **streamflow observations** - time series data that requires different version control considerations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = catalog.load_table(\"streamflow_observations.usgs_hourly\")\n",
+ "table.inspect.snapshots()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = table.scan().to_pandas().set_index(\"time\")\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")\n",
+ "snapshot_id = table.metadata.snapshots[0].snapshot_id"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Adding Time Series Data: Simulating Real-Time Updates\n",
+ "\n",
+ "We'll now add a new streamflow observation to demonstrate version control for time series data\n",
+ "\n",
+ "The process maintains **historical context** while adding new information."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_streamflow_df = df.copy()\n",
+ "new_streamflow_df.loc[len(new_df)] = 0.1\n",
+ "new_streamflow_df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "_df = pa.Table.from_pandas(new_streamflow_df)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)\n",
+ "table.scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Time Travel with Time Series Data\n",
+ "\n",
+ "Comparing different snapshots of time series data reveals:\n",
+ "\n",
+ "#### Original Snapshot (Baseline Data):\n",
+ "- Contains original observational record\n",
+ "- Represents specific quality control state\n",
+ "- Suitable for historical analysis\n",
+ "\n",
+ "#### Latest Snapshot (Updated Data): \n",
+ "- Includes new observations\n",
+ "- Represents current operational state\n",
+ "- Suitable for real-time applications"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "snapshot_id = table.metadata.snapshots[0].snapshot_id\n",
+ "snapshot_id_latest = table.metadata.snapshots[-1].snapshot_id\n",
+ "table.scan(snapshot_id=snapshot_id).to_pandas().tail().set_index(\"time\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = table.scan(snapshot_id=snapshot_id).to_pandas()\n",
+ "_df = pa.Table.from_pandas(df)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)\n",
+ "table.scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.scan(snapshot_id=snapshot_id_latest).to_pandas().tail().set_index(\"time\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Demonstration Cleanup: Reverting Changes\n",
+ "\n",
+ "To maintain data integrity, we'll now **revert our test changes** by removing the added records. This demonstrates:\n",
+ "\n",
+ "- **Controlled rollback** procedures\n",
+ "- **Data management** best practices \n",
+ "- **Cleanup workflows** for testing environments\n",
+ "\n",
+ "**Important**: Even these cleanup operations create new snapshots, maintaining complete audit trails of all activities."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Cleaning up hydrofabric changes\n",
+ "table = catalog.load_table(\"conus_hf.hydrolocations\")\n",
+ "new_df = new_df.drop(new_df.index[-1])\n",
+ "_df = pa.Table.from_pandas(new_df, preserve_index=False)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)\n",
+ "catalog.load_table(\"conus_hf.hydrolocations\").scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Cleaning up Streamflow Observation changes\n",
+ "table = catalog.load_table(\"streamflow_observations.usgs_hourly\")\n",
+ "new_streamflow_df = new_streamflow_df.drop(new_streamflow_df.index[-1])\n",
+ "_df = pa.Table.from_pandas(new_streamflow_df)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)\n",
+ "catalog.load_table(\"streamflow_observations.usgs_hourly\").scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**This demonstration showcases Apache Iceberg's capability to provide version control for water resources data, enabling both reliability and reproducibility for large-scale hydrological modeling systems.**"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "icefabric",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/iceberg/pyiceberg_time_travel.ipynb b/examples/iceberg/pyiceberg_time_travel.ipynb
new file mode 100644
index 0000000..d99689d
--- /dev/null
+++ b/examples/iceberg/pyiceberg_time_travel.ipynb
@@ -0,0 +1,465 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "0",
+ "metadata": {},
+ "source": [
+ "# Demo: Time Travel with Iceberg Tables - CRUD Operations & Version Control\n",
+ "\n",
+ "## Overview\n",
+ "This notebook demonstrates **Create, Read, Update, and Delete (CRUD) operations** on version-controlled data using Apache Iceberg tables. The notebook showcases how Iceberg's snapshot-based architecture enables time travel capabilities and maintains a complete history of all data modifications.\n",
+ "\n",
+ "## Key Features Demonstrated:\n",
+ "- **CREATE**: Creating new tables and adding data\n",
+ "- **READ**: Querying current and historical data snapshots\n",
+ "- **UPDATE**: Modifying table schemas and data\n",
+ "- **DELETE**: Removing columns and dropping tables\n",
+ "- **VERSION CONTROL**: Time travel through snapshots to view historical states\n",
+ "\n",
+ "## Prerequisites:\n",
+ "- a local pyiceberg catalog spun up and referenced through .pyiceberg.yaml\n",
+ "\n",
+ "## Objectives:\n",
+ "By the end of this notebook, you will understand how to:\n",
+ "1. Perform all CRUD operations on Iceberg tables\n",
+ "2. Leverage version control to access historical data states\n",
+ "3. Create and manage table snapshots\n",
+ "4. Navigate between different versions of your data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "from pathlib import Path\n",
+ "\n",
+ "from pyiceberg.catalog import load_catalog\n",
+ "\n",
+ "from icefabric.helpers import load_creds, load_pyiceberg_config\n",
+ "\n",
+ "# Changes the current working dir to be the project root\n",
+ "current_working_dir = Path.cwd()\n",
+ "os.chdir(Path.cwd() / \"../../\")\n",
+ "print(\n",
+ " f\"Changed current working dir from {current_working_dir} to: {Path.cwd()}. This must run at the project root\"\n",
+ ")\n",
+ "\n",
+ "\n",
+ "# dir is where the .env file is located\n",
+ "load_creds(dir=Path.cwd())\n",
+ "\n",
+ "# Loading the local pyiceberg config settings\n",
+ "pyiceberg_config = load_pyiceberg_config(Path.cwd())\n",
+ "catalog = load_catalog(\n",
+ " name=\"sql\",\n",
+ " type=pyiceberg_config[\"catalog\"][\"sql\"][\"type\"],\n",
+ " uri=pyiceberg_config[\"catalog\"][\"sql\"][\"uri\"],\n",
+ " warehouse=pyiceberg_config[\"catalog\"][\"sql\"][\"warehouse\"],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2",
+ "metadata": {},
+ "source": [
+ "### READ Operation: Loading and Inspecting Existing Data\n",
+ "\n",
+ "We begin by demonstrating the **READ** operation by loading an existing table and examining its version history. This shows how Iceberg maintains complete metadata about all snapshots (versions) of the data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = catalog.load_table(\"streamflow_observations.usgs_hourly\")\n",
+ "table.inspect.snapshots()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4",
+ "metadata": {},
+ "source": [
+ "Let's examine the current data in the table. This represents the latest version of our dataset. Notice how we can easily convert Iceberg tables to pandas DataFrames for analysis.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = table.scan().to_pandas().set_index(\"time\")\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6",
+ "metadata": {},
+ "source": [
+ "### Version Control: Capturing Initial State\n",
+ "\n",
+ "**Version Control Feature**: Every operation in Iceberg creates a snapshot with a unique ID. We're capturing the initial snapshot ID here so we can demonstrate time travel capabilities later. This snapshot represents the baseline state of our data before any modifications.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")\n",
+ "snapshot_id = table.metadata.snapshots[0].snapshot_id"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8",
+ "metadata": {},
+ "source": [
+ "### UPDATE Operation: Schema Evolution and Data Modification\n",
+ " \n",
+ "Now we'll demonstrate the **UPDATE** operation by adding a new column to our existing table. This involves:\n",
+ "1. Creating synthetic data for the new column\n",
+ "2. Updating the table schema to accommodate the new column\n",
+ "3. Overwriting the table with the updated data\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "n = len(df)\n",
+ "x = np.linspace(0, n, n)\n",
+ "y = np.sin(2 * np.pi * 1 * x / n).astype(np.float32)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "10",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pyarrow as pa\n",
+ "\n",
+ "df[\"12345678\"] = y\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "11",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "_df = pa.Table.from_pandas(df)\n",
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.union_by_name(_df.schema)\n",
+ "table.overwrite(_df)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "12",
+ "metadata": {},
+ "source": [
+ "After our UPDATE operation, we can verify that the schema has been modified. The new column \"12345678\" should now be part of the table structure.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.schema().fields[-1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "14",
+ "metadata": {},
+ "source": [
+ "### Version Control: Tracking All Changes\n",
+ "\n",
+ "**Version Control Feature**: Notice how Iceberg has automatically created new snapshots for our UPDATE operation. The snapshot history now shows:\n",
+ "- Original data snapshot\n",
+ "- Delete operation snapshot (part of overwrite)\n",
+ "- New append operation snapshot (with the new column)\n",
+ "\n",
+ "This complete audit trail is essential for data governance and debugging."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for snapshot in table.snapshots():\n",
+ " print(f\"Snapshot ID: {snapshot.snapshot_id}; Summary: {snapshot.summary}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16",
+ "metadata": {},
+ "source": [
+ "**Time Travel Feature**: Using the snapshot ID we captured earlier, we can query the table as it existed before our UPDATE operation. This demonstrates Iceberg's powerful time travel capabilities - you can access any historical state of your data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.scan(snapshot_id=snapshot_id).to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "18",
+ "metadata": {},
+ "source": [
+ "Comparing the current state (with the new column) versus the historical state (without the column) demonstrates how version control preserves all data states while allowing easy access to current data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20",
+ "metadata": {},
+ "source": [
+ "Now we'll demonstrate another **UPDATE** operation by removing the column we just added. This shows how Iceberg handles schema evolution in both directions (adding and removing columns).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with table.update_schema() as update_schema:\n",
+ " update_schema.delete_column(\"12345678\")\n",
+ "\n",
+ "df = df.drop(\"12345678\", axis=1)\n",
+ "_df = pa.Table.from_pandas(df)\n",
+ "table.overwrite(_df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.schema().fields[-1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "23",
+ "metadata": {},
+ "source": [
+ "### CREATE Operation: Building New Tables\n",
+ "\n",
+ "Now we'll demonstrate the **CREATE** operation by building an entirely new table from scratch. This shows how to:\n",
+ "1. Prepare data for a new table\n",
+ "2. Create the table structure in the catalog\n",
+ "3. Populate the table with initial data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "24",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "__df = df.copy()\n",
+ "__df[\"12345678\"] = y\n",
+ "subset_df = __df[[\"12345678\"]].copy()\n",
+ "subset_df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "25",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "namespace = \"streamflow_observations\"\n",
+ "table_name = \"testing_hourly\"\n",
+ "arrow_table = pa.Table.from_pandas(subset_df)\n",
+ "iceberg_table = catalog.create_table(\n",
+ " f\"{namespace}.{table_name}\",\n",
+ " schema=arrow_table.schema,\n",
+ ")\n",
+ "iceberg_table.append(arrow_table)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "26",
+ "metadata": {},
+ "source": [
+ "### READ Operation: Verifying New Table Creation \n",
+ "\n",
+ "After our **CREATE** operation, we can verify that the new table exists in our namespace and examine its initial snapshot. Every new table starts with its first snapshot upon creation.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "catalog.list_tables(namespace)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = catalog.load_table(f\"{namespace}.{table_name}\")\n",
+ "table.inspect.snapshots()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table.scan().to_pandas().tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "30",
+ "metadata": {},
+ "source": [
+ "### DELETE Operation: Table Removal\n",
+ "\n",
+ "Finally, we demonstrate the **DELETE** operation by completely removing the table we just created. This shows how to clean up resources and manage table lifecycle.\n",
+ "\n",
+ "**Important**: Unlike column deletion (which is reversible through time travel), table deletion is permanent and removes all snapshots and data.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "31",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "catalog.drop_table(f\"{namespace}.{table_name}\")\n",
+ "catalog.list_tables(namespace)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32",
+ "metadata": {},
+ "source": [
+ "### Summary: CRUD Operations and Version Control Demonstrated\n",
+ " \n",
+ "This notebook has successfully demonstrated all required CRUD operations with version-controlled data:\n",
+ " \n",
+ "#### CREATE Operations:\n",
+ "- Created new tables with `catalog.create_table()`\n",
+ "- Added new columns to existing tables\n",
+ "- Populated tables with initial data using `append()`\n",
+ "\n",
+ "#### READ Operations:\n",
+ "- Loaded existing tables with `catalog.load_table()`\n",
+ "- Queried current data states with `table.scan()`\n",
+ "- Accessed historical data states using snapshot IDs\n",
+ "- Inspected table schemas and metadata\n",
+ " \n",
+ "#### UPDATE Operations:\n",
+ "- Modified table schemas by adding columns\n",
+ "- Updated data through `overwrite()` operations\n",
+ "- Removed columns from existing tables\n",
+ "\n",
+ "#### DELETE Operations:\n",
+ "- Deleted columns from table schemas\n",
+ "- Removed entire tables with `catalog.drop_table()`\n",
+ "\n",
+ "#### Version Control Features:\n",
+ "- **Snapshot Management**: Every operation creates tracked snapshots\n",
+ "- **Time Travel**: Access any historical state using snapshot IDs\n",
+ "- **Audit Trail**: Complete history of all table modifications\n",
+ "- **Schema Evolution**: Track changes to table structure over time\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "icefabric",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/iceberg/querying_ras_xs.ipynb b/examples/iceberg/querying_ras_xs.ipynb
new file mode 100644
index 0000000..2e1ddb4
--- /dev/null
+++ b/examples/iceberg/querying_ras_xs.ipynb
@@ -0,0 +1,750 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Querying RAS XS through PyIceberg\n",
+ "\n",
+ "The following notebook is to walk you through the process of querying a RAS XS through PyIceberg. \n",
+ "\n",
+ "For the warehouse path, please put the path to your S3 tables URI\n",
+ "\n",
+ "Requires `.env` containing `test` account credentials AND default region: `AWS_DEFAULT_REGION=\"us-east-1\"`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "\n",
+ "from pyiceberg.catalog import load_catalog"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from icefabric.helpers import load_creds\n",
+ "\n",
+ "# dir is where the .env file is located\n",
+ "load_creds(dir=Path.cwd().parents[1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/Users/taddbindas/projects/NGWPC/icefabric/.pyiceberg.yaml'"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"PYICEBERG_HOME\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NoSuchPropertyException",
+ "evalue": "SQL connection URI is required",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+ "\u001b[31mNoSuchPropertyException\u001b[39m Traceback (most recent call last)",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m catalog = \u001b[43mload_catalog\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msql\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 2\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtype\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msql\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m catalog.list_tables(\u001b[33m\"\u001b[39m\u001b[33mmip_xs\u001b[39m\u001b[33m\"\u001b[39m)[\u001b[32m40\u001b[39m:\u001b[32m50\u001b[39m]\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/projects/NGWPC/icefabric/.venv/lib/python3.12/site-packages/pyiceberg/catalog/__init__.py:260\u001b[39m, in \u001b[36mload_catalog\u001b[39m\u001b[34m(name, **properties)\u001b[39m\n\u001b[32m 257\u001b[39m catalog_type = infer_catalog_type(name, conf)\n\u001b[32m 259\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m catalog_type:\n\u001b[32m--> \u001b[39m\u001b[32m260\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mAVAILABLE_CATALOGS\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcatalog_type\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mDict\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 262\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mCould not initialize catalog with the following properties: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mproperties\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/projects/NGWPC/icefabric/.venv/lib/python3.12/site-packages/pyiceberg/catalog/__init__.py:159\u001b[39m, in \u001b[36mload_sql\u001b[39m\u001b[34m(name, conf)\u001b[39m\n\u001b[32m 156\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 157\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpyiceberg\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mcatalog\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msql\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m SqlCatalog\n\u001b[32m--> \u001b[39m\u001b[32m159\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSqlCatalog\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mconf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 160\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 161\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NotInstalledError(\n\u001b[32m 162\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mSQLAlchemy support not installed: pip install \u001b[39m\u001b[33m'\u001b[39m\u001b[33mpyiceberg[sql-postgres]\u001b[39m\u001b[33m'\u001b[39m\u001b[33m or pip install \u001b[39m\u001b[33m'\u001b[39m\u001b[33mpyiceberg[sql-sqlite]\u001b[39m\u001b[33m'\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 163\u001b[39m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mexc\u001b[39;00m\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~/projects/NGWPC/icefabric/.venv/lib/python3.12/site-packages/pyiceberg/catalog/sql.py:123\u001b[39m, in \u001b[36mSqlCatalog.__init__\u001b[39m\u001b[34m(self, name, **properties)\u001b[39m\n\u001b[32m 120\u001b[39m \u001b[38;5;28msuper\u001b[39m().\u001b[34m__init__\u001b[39m(name, **properties)\n\u001b[32m 122\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (uri_prop := \u001b[38;5;28mself\u001b[39m.properties.get(\u001b[33m\"\u001b[39m\u001b[33muri\u001b[39m\u001b[33m\"\u001b[39m)):\n\u001b[32m--> \u001b[39m\u001b[32m123\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m NoSuchPropertyException(\u001b[33m\"\u001b[39m\u001b[33mSQL connection URI is required\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 125\u001b[39m echo_str = \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mself\u001b[39m.properties.get(\u001b[33m\"\u001b[39m\u001b[33mecho\u001b[39m\u001b[33m\"\u001b[39m, DEFAULT_ECHO_VALUE)).lower()\n\u001b[32m 126\u001b[39m echo = strtobool(echo_str) \u001b[38;5;28;01mif\u001b[39;00m echo_str != \u001b[33m\"\u001b[39m\u001b[33mdebug\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mdebug\u001b[39m\u001b[33m\"\u001b[39m\n",
+ "\u001b[31mNoSuchPropertyException\u001b[39m: SQL connection URI is required"
+ ]
+ }
+ ],
+ "source": [
+ "catalog = load_catalog(\"glue\")\n",
+ "catalog.list_tables(\"mip_xs\")[40:50]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using `catalog.load_table()` we can directly call the XS data. Each is stored based on HUC8"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "