Skip to content

Commit a2318ff

Browse files
seohyonkimZethson
andauthored
Update 5. Interoperability (#347)
* few changes in the beginning * chage interoperability_environment.yml * change python version to >= 3.12 in yml * clean til 5.3 * til 5.4 * WIP multimodal data * this environment works * remove jupyternb and ipywidgets from environment * WORKS. Come back here when you want to rage quit * delete local cells * redoing cos push failed * add changelog fragment * update env, interoperability_dataset * more detailed description in changelog fragments * move anndata generating part to the dataset nb. fix accessing anndata and MuData file with LaminDB. * add lamin info box * Update jupyter-book/introduction/interoperability.yml Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> --------- Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>
1 parent cebb22d commit a2318ff

File tree

10 files changed

+956
-575
lines changed

10 files changed

+956
-575
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ default_stages:
77
minimum_pre_commit_version: 2.16.0
88
repos:
99
- repo: https://github.com/pre-commit/mirrors-prettier
10-
rev: v2.5.1
10+
rev: v3.1.0
1111
hooks:
1212
- id: prettier
1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.8.6
14+
rev: v0.9.7
1515
hooks:
1616
- id: ruff
1717
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]

changelog.d/345.added.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
Add `release notes`
1+
Add new feature `changelog`. Changelog fragments will be added in `changelog` chapter.

changelog.d/347.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add dataset generator and updatae texts in `Interoperability` chapter

jupyter-book/_static/book.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
document.querySelector(".prev-next-bottom").remove();
1919
document.querySelector(".footer").remove();
2020
var elementsToRemove = document.querySelectorAll(
21-
".remove-from-content-only"
21+
".remove-from-content-only",
2222
);
2323
elementsToRemove.forEach(function (el) {
2424
el.remove();
@@ -30,8 +30,8 @@
3030
var style = document.createElement("style");
3131
style.appendChild(
3232
document.createTextNode(
33-
"hypothesis-sidebar, hypothesis-notebook, hypothesis-adder{display:none!important;}"
34-
)
33+
"hypothesis-sidebar, hypothesis-notebook, hypothesis-adder{display:none!important;}",
34+
),
3535
);
3636
document.getElementsByTagName("head")[0].appendChild(style);
3737
}

jupyter-book/conditions/compositional.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3961,7 +3961,7 @@
39613961
" y=-np.log10(alpha),\n",
39623962
" color=\"red\",\n",
39633963
" linewidth=1,\n",
3964-
" label=f\"{int(alpha*100)} % SpatialFDR\",\n",
3964+
" label=f\"{int(alpha * 100)} % SpatialFDR\",\n",
39653965
" )\n",
39663966
" plt.legend()\n",
39673967
" plt.xlabel(\"log-Fold Change\")\n",
@@ -3977,7 +3977,7 @@
39773977
" sns.scatterplot(data=df, x=\"logCPM\", y=\"logFC\", hue=\"Sig\")\n",
39783978
" plt.axhline(y=0, color=\"grey\", linewidth=1)\n",
39793979
" plt.axhline(y=emp_null, color=\"purple\", linewidth=1)\n",
3980-
" plt.legend(title=f\"< {int(alpha*100)} % SpatialFDR\")\n",
3980+
" plt.legend(title=f\"< {int(alpha * 100)} % SpatialFDR\")\n",
39813981
" plt.xlabel(\"Mean log-counts\")\n",
39823982
" plt.ylabel(\"log-Fold Change\")\n",
39833983
" plt.show()\n",

jupyter-book/introduction/analysis_tools.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2132,8 +2132,8 @@
21322132
],
21332133
"source": [
21342134
"adata = ad.AnnData(y)\n",
2135-
"adata.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
2136-
"adata.var_names = [f\"var_{j+1}\" for j in range(d)]\n",
2135+
"adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
2136+
"adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n",
21372137
"adata"
21382138
]
21392139
},
@@ -2159,8 +2159,8 @@
21592159
"y2 = np.dot(z, w2.T)\n",
21602160
"\n",
21612161
"adata2 = ad.AnnData(y2)\n",
2162-
"adata2.obs_names = [f\"obs_{i+1}\" for i in range(n)]\n",
2163-
"adata2.var_names = [f\"var2_{j+1}\" for j in range(d2)]\n",
2162+
"adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
2163+
"adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n",
21642164
"adata2"
21652165
]
21662166
},

jupyter-book/introduction/interoperability.ipynb

Lines changed: 690 additions & 544 deletions
Large diffs are not rendered by default.

jupyter-book/introduction/interoperability.yml

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,27 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- conda-forge::python=3.9.16
8-
- conda-forge::jupyterlab=3.6.3
9-
- conda-forge::scanpy=1.9.3
10-
- anndata2ri=1.2
11-
- bioconductor-basilisk=1.9.12
12-
- bioconductor-mudata=1.2.0
13-
- bioconductor-scuttle=1.8.0
14-
- bioconductor-singlecellexperiment=1.20.0
15-
- bioconductor-zellkonverter=1.8.0
16-
- ipywidgets=8.1.0
17-
- mudata=0.2.3
18-
- r-base=4.2
19-
- r-hdf5r=1.3.8
20-
- r-remotes=2.4.2
21-
- r-reticulate=1.30
22-
- r-sessioninfo=1.2.2
23-
- r-seurat=4.3.0
24-
- r-seuratobject=4.1.3
7+
- conda-forge::python=3.12.9
8+
- conda-forge::scanpy=1.11.0
9+
- anndata2ri=1.3.2
10+
- bioconductor-basilisk=1.14.1
11+
- bioconductor-mudata=1.6.0
12+
- bioconductor-scuttle=1.12.0
13+
- bioconductor-singlecellexperiment=1.24.0
14+
- bioconductor-zellkonverter=1.12.1
15+
- mudata=0.3.1
16+
- r-base=4.3.3
17+
- r-hdf5r=1.3.12
18+
- r-remotes=2.5.0
19+
- r-reticulate=1.40.0
20+
- r-sessioninfo=1.2.3
21+
- r-seurat=5.2.1
22+
- r-seuratobject=5.0.2
2523
- rpy2=3.5.11
2624
- session-info=1.0.0
25+
- pip
26+
- pip:
27+
- lamindb[bionty,jupyter]
2728
# Additional R packages installed manually using {remotes}
2829
# remotes::install_github("mojaveazure/seurat-disk@9b89970eac2a3bd770e744f63c7763419486b14c")
2930
# remotes::install_github("cellgeni/sceasy@v0.0.7")

jupyter-book/mechanisms/gene_regulatory_networks.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,7 @@
11131113
" ax.text(\n",
11141114
" x=x,\n",
11151115
" y=ax.get_ylim()[1],\n",
1116-
" s=f\"{int(x)} ({percentiles.index.values[i]*100}%)\",\n",
1116+
" s=f\"{int(x)} ({percentiles.index.values[i] * 100}%)\",\n",
11171117
" color=\"red\",\n",
11181118
" rotation=30,\n",
11191119
" size=\"x-small\",\n",
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Interoperability- Preparing the MuData dataset"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": 9,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"import lamindb as ln"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 10,
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"assert ln.setup.settings.instance.slug == \"theislab/sc-best-practices\""
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 11,
31+
"metadata": {},
32+
"outputs": [
33+
{
34+
"name": "stdout",
35+
"output_type": "stream",
36+
"text": [
37+
"\u001b[92m→\u001b[0m loaded Transform('sHhbAE1UThuC0000'), re-started Run('bn9MrSp1...') at 2025-03-26 12:49:31 UTC\n",
38+
"\u001b[92m→\u001b[0m notebook imports: anndata==0.11.3 lamindb==1.3.0 mudata==0.3.1 numpy==2.1.3\n"
39+
]
40+
}
41+
],
42+
"source": [
43+
"ln.track()"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## generate AnnData"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 12,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"import anndata\n",
60+
"import numpy\n",
61+
"import scanpy\n",
62+
"from scipy.sparse import csr_matrix"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"# Create a randomly generated AnnData object to use as an example\n",
72+
"counts = csr_matrix(\n",
73+
" numpy.random.default_rng().poisson(1, size=(100, 2000)), dtype=numpy.float32\n",
74+
")\n",
75+
"adata = anndata.AnnData(counts)\n",
76+
"adata.obs_names = [f\"Cell_{i:d}\" for i in range(adata.n_obs)]\n",
77+
"adata.var_names = [f\"Gene_{i:d}\" for i in range(adata.n_vars)]\n",
78+
"# Do some standard processing to populate the object\n",
79+
"scanpy.pp.calculate_qc_metrics(adata, inplace=True)\n",
80+
"adata.layers[\"counts\"] = adata.X.copy()\n",
81+
"scanpy.pp.normalize_total(adata, inplace=True)\n",
82+
"scanpy.pp.log1p(adata)\n",
83+
"scanpy.pp.highly_variable_genes(adata, inplace=True)\n",
84+
"scanpy.tl.pca(adata)\n",
85+
"scanpy.pp.neighbors(adata)\n",
86+
"scanpy.tl.umap(adata)\n",
87+
"adata.write(\"interoperability_adata.h5ad\")"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": 15,
93+
"metadata": {},
94+
"outputs": [
95+
{
96+
"name": "stdout",
97+
"output_type": "stream",
98+
"text": [
99+
"... uploading interoperability_adata.h5ad: 100.0%\n"
100+
]
101+
},
102+
{
103+
"data": {
104+
"text/plain": [
105+
"Artifact(uid='Y0xl4XzORVJMgFDl0000', is_latest=True, key='introduction/interoperability_adata.h5ad', description='anndata for interoperability', suffix='.h5ad', otype='AnnData', size=3180536, hash='kJuZZxiZdPF0IXZqZLOfGQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-26 12:56:10 UTC)"
106+
]
107+
},
108+
"execution_count": 15,
109+
"metadata": {},
110+
"output_type": "execute_result"
111+
}
112+
],
113+
"source": [
114+
"af = ln.Artifact(\n",
115+
" \"interoperability_adata.h5ad\",\n",
116+
" key=\"introduction/interoperability_adata.h5ad\",\n",
117+
" description=\"anndata for interoperability\",\n",
118+
").save()\n",
119+
"af"
120+
]
121+
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"## generate MuData"
127+
]
128+
},
129+
{
130+
"cell_type": "code",
131+
"execution_count": 17,
132+
"metadata": {},
133+
"outputs": [],
134+
"source": [
135+
"import mudata as md\n",
136+
"import numpy as np\n",
137+
"\n",
138+
"md.set_options(pull_on_update=False)\n",
139+
"\n",
140+
"# Use modern random number generation\n",
141+
"rng = np.random.default_rng(seed=1)\n",
142+
"\n",
143+
"# create random data\n",
144+
"n, d, k = 1000, 100, 10\n",
145+
"z = rng.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))\n",
146+
"w = rng.normal(size=(d, k))\n",
147+
"y = np.dot(z, w.T)\n",
148+
"\n",
149+
"# create AnnData from the matrix\n",
150+
"adata = anndata.AnnData(y)\n",
151+
"adata.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
152+
"adata.var_names = [f\"var_{j + 1}\" for j in range(d)]\n",
153+
"\n",
154+
"if adata.obs.shape[1] == 0:\n",
155+
" adata.obs[\"dummy_obs\"] = \"placeholder\"\n",
156+
"\n",
157+
"if adata.var.shape[1] == 0:\n",
158+
" adata.var[\"dummy_var\"] = \"placeholder\"\n",
159+
"\n",
160+
"# second AnnData object\n",
161+
"d2 = 50\n",
162+
"w2 = rng.normal(size=(d2, k))\n",
163+
"y2 = np.dot(z, w2.T)\n",
164+
"\n",
165+
"adata2 = anndata.AnnData(y2)\n",
166+
"adata2.obs_names = [f\"obs_{i + 1}\" for i in range(n)]\n",
167+
"adata2.var_names = [f\"var2_{j + 1}\" for j in range(d2)]\n",
168+
"\n",
169+
"if adata2.obs.shape[1] == 0:\n",
170+
" adata2.obs[\"dummy_obs\"] = \"placeholder\"\n",
171+
"\n",
172+
"if adata2.var.shape[1] == 0:\n",
173+
" adata2.var[\"dummy_var\"] = \"placeholder\"\n",
174+
"\n",
175+
"mdata = md.MuData({\"A\": adata, \"B\": adata2})\n",
176+
"mdata.write(\"interoperability_mdata.h5mu\")"
177+
]
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": 18,
182+
"metadata": {},
183+
"outputs": [
184+
{
185+
"name": "stdout",
186+
"output_type": "stream",
187+
"text": [
188+
"\u001b[92m→\u001b[0m returning existing artifact with same hash: Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC); to track this artifact as an input, use: ln.Artifact.get()\n"
189+
]
190+
},
191+
{
192+
"data": {
193+
"text/plain": [
194+
"Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC)"
195+
]
196+
},
197+
"execution_count": 18,
198+
"metadata": {},
199+
"output_type": "execute_result"
200+
}
201+
],
202+
"source": [
203+
"af = ln.Artifact(\n",
204+
" \"interoperability_mdata.h5mu\",\n",
205+
" key=\"introduction/interoperability_mdata.h5mu\",\n",
206+
" description=\"MuData object for interoperability chapter\",\n",
207+
").save()\n",
208+
"af"
209+
]
210+
}
211+
],
212+
"metadata": {
213+
"kernelspec": {
214+
"display_name": "interoperability",
215+
"language": "python",
216+
"name": "python3"
217+
},
218+
"language_info": {
219+
"codemirror_mode": {
220+
"name": "ipython",
221+
"version": 3
222+
},
223+
"file_extension": ".py",
224+
"mimetype": "text/x-python",
225+
"name": "python",
226+
"nbconvert_exporter": "python",
227+
"pygments_lexer": "ipython3",
228+
"version": "3.12.9"
229+
}
230+
},
231+
"nbformat": 4,
232+
"nbformat_minor": 2
233+
}

0 commit comments

Comments
 (0)