Update 5. Interoperability (#347)

seohyonkim · Zethson · web-flow · commit a2318ffde891 · 2025-03-26T18:03:48.000+01:00
* few changes in the beginning

* chage interoperability_environment.yml

* change python version to &gt;= 3.12 in yml

* clean til 5.3

* til 5.4

* WIP multimodal data

* this environment works

* remove jupyternb and ipywidgets from environment

* WORKS. Come back here when you want to rage quit

* delete local cells

* redoing cos push failed

* add changelog fragment

* update env, interoperability_dataset

* more detailed description in changelog fragments

* move anndata generating part to the dataset nb. fix accessing anndata and MuData file with LaminDB.

* add lamin info box

* Update jupyter-book/introduction/interoperability.yml

Co-authored-by: Lukas Heumos &lt;lukas.heumos@posteo.net&gt;

---------

Co-authored-by: Lukas Heumos &lt;lukas.heumos@posteo.net&gt;
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,11 +7,11 @@ default_stages:
 minimum_pre_commit_version: 2.16.0
 repos:
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v2.5.1
+    rev: v3.1.0
     hooks:
       - id: prettier
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
+    rev: v0.9.7
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
diff --git a/changelog.d/345.added.md b/changelog.d/345.added.md
@@ -1 +1 @@
-Add `release notes`
+Add new feature `changelog`. Changelog fragments will be added in `changelog` chapter.
diff --git a/changelog.d/347.changed.md b/changelog.d/347.changed.md
@@ -0,0 +1 @@
+Add dataset generator and updatae texts in `Interoperability` chapter
diff --git a/jupyter-book/_static/book.js b/jupyter-book/_static/book.js
@@ -18,7 +18,7 @@
     document.querySelector(".prev-next-bottom").remove();
     document.querySelector(".footer").remove();
     var elementsToRemove = document.querySelectorAll(
-      ".remove-from-content-only"
+      ".remove-from-content-only",
     );
     elementsToRemove.forEach(function (el) {
       el.remove();
@@ -30,8 +30,8 @@
     var style = document.createElement("style");
     style.appendChild(
       document.createTextNode(
-        "hypothesis-sidebar, hypothesis-notebook, hypothesis-adder{display:none!important;}"
-      )
+        "hypothesis-sidebar, hypothesis-notebook, hypothesis-adder{display:none!important;}",
+      ),
     );
     document.getElementsByTagName("head")[0].appendChild(style);
   }
diff --git a/jupyter-book/conditions/compositional.ipynb b/jupyter-book/conditions/compositional.ipynb
@@ -3961,7 +3961,7 @@
     "            y=-np.log10(alpha),\n",
     "            color=\"red\",\n",
     "            linewidth=1,\n",
-    "            label=f\"{int(alpha*100)} % SpatialFDR\",\n",
+    "            label=f\"{int(alpha * 100)} % SpatialFDR\",\n",
     "        )\n",
     "        plt.legend()\n",
     "        plt.xlabel(\"log-Fold Change\")\n",
@@ -3977,7 +3977,7 @@
     "        sns.scatterplot(data=df, x=\"logCPM\", y=\"logFC\", hue=\"Sig\")\n",
     "        plt.axhline(y=0, color=\"grey\", linewidth=1)\n",
     "        plt.axhline(y=emp_null, color=\"purple\", linewidth=1)\n",
-    "        plt.legend(title=f\"< {int(alpha*100)} % SpatialFDR\")\n",
+    "        plt.legend(title=f\"< {int(alpha * 100)} % SpatialFDR\")\n",
     "        plt.xlabel(\"Mean log-counts\")\n",
     "        plt.ylabel(\"log-Fold Change\")\n",
     "        plt.show()\n",
diff --git a/jupyter-book/introduction/analysis_tools.ipynb b/jupyter-book/introduction/analysis_tools.ipynb
@@ -2132,8 +2132,8 @@
    ],
    "source": [
     "adata = ad.AnnData(y)\n",
-    "adata.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
-    "adata.var_names = [f\"var_{j+1}\" for j in range(d)]\n",
+    "adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+    "adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n",
     "adata"
    ]
   },
@@ -2159,8 +2159,8 @@
     "y2 = np.dot(z, w2.T)\n",
     "\n",
     "adata2 = ad.AnnData(y2)\n",
-    "adata2.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
-    "adata2.var_names = [f\"var2_{j+1}\" for j in range(d2)]\n",
+    "adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+    "adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n",
     "adata2"
    ]
   },
diff --git a/jupyter-book/introduction/interoperability.ipynb b/jupyter-book/introduction/interoperability.ipynb
diff --git a/jupyter-book/introduction/interoperability.yml b/jupyter-book/introduction/interoperability.yml
@@ -4,26 +4,27 @@ channels:
   - bioconda
   - defaults
 dependencies:
-  - conda-forge::python=3.9.16
-  - conda-forge::jupyterlab=3.6.3
-  - conda-forge::scanpy=1.9.3
-  - anndata2ri=1.2
-  - bioconductor-basilisk=1.9.12
-  - bioconductor-mudata=1.2.0
-  - bioconductor-scuttle=1.8.0
-  - bioconductor-singlecellexperiment=1.20.0
-  - bioconductor-zellkonverter=1.8.0
-  - ipywidgets=8.1.0
-  - mudata=0.2.3
-  - r-base=4.2
-  - r-hdf5r=1.3.8
-  - r-remotes=2.4.2
-  - r-reticulate=1.30
-  - r-sessioninfo=1.2.2
-  - r-seurat=4.3.0
-  - r-seuratobject=4.1.3
+  - conda-forge::python=3.12.9
+  - conda-forge::scanpy=1.11.0
+  - anndata2ri=1.3.2
+  - bioconductor-basilisk=1.14.1
+  - bioconductor-mudata=1.6.0
+  - bioconductor-scuttle=1.12.0
+  - bioconductor-singlecellexperiment=1.24.0
+  - bioconductor-zellkonverter=1.12.1
+  - mudata=0.3.1
+  - r-base=4.3.3
+  - r-hdf5r=1.3.12
+  - r-remotes=2.5.0
+  - r-reticulate=1.40.0
+  - r-sessioninfo=1.2.3
+  - r-seurat=5.2.1
+  - r-seuratobject=5.0.2
   - rpy2=3.5.11
   - session-info=1.0.0
+  - pip
+  - pip:
+      - lamindb[bionty,jupyter]
   # Additional R packages installed manually using {remotes}
   # remotes::install_github("mojaveazure/seurat-disk@9b89970eac2a3bd770e744f63c7763419486b14c")
   # remotes::install_github("cellgeni/sceasy@v0.0.7")
diff --git a/jupyter-book/mechanisms/gene_regulatory_networks.ipynb b/jupyter-book/mechanisms/gene_regulatory_networks.ipynb
@@ -1113,7 +1113,7 @@
     "    ax.text(\n",
     "        x=x,\n",
     "        y=ax.get_ylim()[1],\n",
-    "        s=f\"{int(x)} ({percentiles.index.values[i]*100}%)\",\n",
+    "        s=f\"{int(x)} ({percentiles.index.values[i] * 100}%)\",\n",
     "        color=\"red\",\n",
     "        rotation=30,\n",
     "        size=\"x-small\",\n",
diff --git a/scripts/introduction/interoperability_dataset.ipynb b/scripts/introduction/interoperability_dataset.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Interoperability- Preparing the MuData dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import lamindb as ln"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert ln.setup.settings.instance.slug == \"theislab/sc-best-practices\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m→\u001b[0m loaded Transform('sHhbAE1UThuC0000'), re-started Run('bn9MrSp1...') at 2025-03-26 12:49:31 UTC\n",
+      "\u001b[92m→\u001b[0m notebook imports: anndata==0.11.3 lamindb==1.3.0 mudata==0.3.1 numpy==2.1.3\n"
+     ]
+    }
+   ],
+   "source": [
+    "ln.track()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## generate AnnData"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import anndata\n",
+    "import numpy\n",
+    "import scanpy\n",
+    "from scipy.sparse import csr_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a randomly generated AnnData object to use as an example\n",
+    "counts = csr_matrix(\n",
+    "    numpy.random.default_rng().poisson(1, size=(100, 2000)), dtype=numpy.float32\n",
+    ")\n",
+    "adata = anndata.AnnData(counts)\n",
+    "adata.obs_names = [f\"Cell_{i:d}\" for i in range(adata.n_obs)]\n",
+    "adata.var_names = [f\"Gene_{i:d}\" for i in range(adata.n_vars)]\n",
+    "# Do some standard processing to populate the object\n",
+    "scanpy.pp.calculate_qc_metrics(adata, inplace=True)\n",
+    "adata.layers[\"counts\"] = adata.X.copy()\n",
+    "scanpy.pp.normalize_total(adata, inplace=True)\n",
+    "scanpy.pp.log1p(adata)\n",
+    "scanpy.pp.highly_variable_genes(adata, inplace=True)\n",
+    "scanpy.tl.pca(adata)\n",
+    "scanpy.pp.neighbors(adata)\n",
+    "scanpy.tl.umap(adata)\n",
+    "adata.write(\"interoperability_adata.h5ad\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "... uploading interoperability_adata.h5ad: 100.0%\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Artifact(uid='Y0xl4XzORVJMgFDl0000', is_latest=True, key='introduction/interoperability_adata.h5ad', description='anndata for interoperability', suffix='.h5ad', otype='AnnData', size=3180536, hash='kJuZZxiZdPF0IXZqZLOfGQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-26 12:56:10 UTC)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "af = ln.Artifact(\n",
+    "    \"interoperability_adata.h5ad\",\n",
+    "    key=\"introduction/interoperability_adata.h5ad\",\n",
+    "    description=\"anndata for interoperability\",\n",
+    ").save()\n",
+    "af"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## generate MuData"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mudata as md\n",
+    "import numpy as np\n",
+    "\n",
+    "md.set_options(pull_on_update=False)\n",
+    "\n",
+    "# Use modern random number generation\n",
+    "rng = np.random.default_rng(seed=1)\n",
+    "\n",
+    "# create random data\n",
+    "n, d, k = 1000, 100, 10\n",
+    "z = rng.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n",
+    "w = rng.normal(size=(d, k))\n",
+    "y = np.dot(z, w.T)\n",
+    "\n",
+    "# create AnnData from the matrix\n",
+    "adata = anndata.AnnData(y)\n",
+    "adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+    "adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n",
+    "\n",
+    "if adata.obs.shape[1] == 0:\n",
+    "    adata.obs[\"dummy_obs\"] = \"placeholder\"\n",
+    "\n",
+    "if adata.var.shape[1] == 0:\n",
+    "    adata.var[\"dummy_var\"] = \"placeholder\"\n",
+    "\n",
+    "# second AnnData object\n",
+    "d2 = 50\n",
+    "w2 = rng.normal(size=(d2, k))\n",
+    "y2 = np.dot(z, w2.T)\n",
+    "\n",
+    "adata2 = anndata.AnnData(y2)\n",
+    "adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
+    "adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n",
+    "\n",
+    "if adata2.obs.shape[1] == 0:\n",
+    "    adata2.obs[\"dummy_obs\"] = \"placeholder\"\n",
+    "\n",
+    "if adata2.var.shape[1] == 0:\n",
+    "    adata2.var[\"dummy_var\"] = \"placeholder\"\n",
+    "\n",
+    "mdata = md.MuData({\"A\": adata, \"B\": adata2})\n",
+    "mdata.write(\"interoperability_mdata.h5mu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m→\u001b[0m returning existing artifact with same hash: Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC); to track this artifact as an input, use: ln.Artifact.get()\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "af = ln.Artifact(\n",
+    "    \"interoperability_mdata.h5mu\",\n",
+    "    key=\"introduction/interoperability_mdata.h5mu\",\n",
+    "    description=\"MuData object for interoperability chapter\",\n",
+    ").save()\n",
+    "af"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "interoperability",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-Add `release notes`
	`1`	+Add new feature `changelog`. Changelog fragments will be added in `changelog` chapter.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Add dataset generator and updatae texts in `Interoperability` chapter