gao-lab
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/analysis/aggr_datasets.ipynb
Lines changed: 147 additions & 66 deletions b/‎benchmark/analysis/aggr_datasets.ipynb
Lines changed: 147 additions & 66 deletions
diff --git a/‎benchmark/analysis/celltype_region_analysis.ipynb
Lines changed: 83 additions & 90 deletions b/‎benchmark/analysis/celltype_region_analysis.ipynb
Lines changed: 83 additions & 90 deletions
diff --git a/‎benchmark/analysis/check_MERFISH.ipynb
Lines changed: 248 additions & 0 deletions b/‎benchmark/analysis/check_MERFISH.ipynb
Lines changed: 248 additions & 0 deletions
diff --git a/‎benchmark/analysis/hetero.ipynb
Lines changed: 245 additions & 0 deletions b/‎benchmark/analysis/hetero.ipynb
Lines changed: 245 additions & 0 deletions
diff --git a/‎benchmark/analysis/plot_confusion_matrix.ipynb
Lines changed: 466 additions & 0 deletions b/‎benchmark/analysis/plot_confusion_matrix.ipynb
Lines changed: 466 additions & 0 deletions
diff --git a/‎benchmark/analysis/plot_keypoints.ipynb
Lines changed: 376 additions & 0 deletions b/‎benchmark/analysis/plot_keypoints.ipynb
Lines changed: 376 additions & 0 deletions
diff --git a/‎benchmark/config/config.yaml
Lines changed: 3 additions & 3 deletions b/‎benchmark/config/config.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎benchmark/profiles/impetus/impetus.yaml
Lines changed: 1 addition & 1 deletion b/‎benchmark/profiles/impetus/impetus.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/workflow/notebooks/data_split.ipynb
Lines changed: 1 addition & 1 deletion b/‎benchmark/workflow/notebooks/data_split.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/workflow/notebooks/emb2metrics.ipynb
Lines changed: 69 additions & 10 deletions b/‎benchmark/workflow/notebooks/emb2metrics.ipynb
Lines changed: 69 additions & 10 deletions
diff --git a/‎benchmark/workflow/notebooks/run_SLAT_dpca.ipynb
Lines changed: 1 addition & 5 deletions b/‎benchmark/workflow/notebooks/run_SLAT_dpca.ipynb
Lines changed: 1 addition & 5 deletions
diff --git a/‎benchmark/workflow/notebooks/run_SLAT_dpca_one2many.ipynb
Lines changed: 1 addition & 5 deletions b/‎benchmark/workflow/notebooks/run_SLAT_dpca_one2many.ipynb
Lines changed: 1 addition & 5 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎scSLAT/viz/multi_dataset.py
Lines changed: 2 additions & 2 deletions b/‎scSLAT/viz/multi_dataset.py
Lines changed: 2 additions & 2 deletions
@@ -25,6 +25,7 @@
 *.gz
 *.whl
 *.html
+*.DS_Store
 
 # folder
 **/conda/
 
@@ -1,13 +1,13 @@
 #-------------------------------- Global ----------------------------------#
 timeout: 24h
 timehold: 200h
-seed: 8
+seed: 1
 sample: 0
 fix_sample: True
 
 use:
-  # - benchmark
-  - perturb
+  - benchmark
+  # - perturb
   # - split_data
   # - build_3d
 
 
@@ -3,7 +3,7 @@ __default__:
   output: ".slurm/{rule}.out"
   error: ".slurm/{rule}.err"
   account: gglab
-  partition: cpu2
+  partition: fat2
   n_node: 1
   n_task: 1
   n_cpu: "{threads}"
 
@@ -95,7 +95,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.8.17"
   },
   "orig_nbformat": 4,
   "vscode": {
 
@@ -15,7 +15,9 @@
     "import pandas as pd\n",
     "import scanpy as sc\n",
     "import scipy.sparse as sp\n",
-    "from sklearn.metrics import f1_score\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
+    "\n",
     "\n",
     "from scSLAT.model import spatial_match\n",
     "from scSLAT.metrics import global_score, euclidean_dis, rotation_angle\n",
@@ -108,6 +110,16 @@
     "    spot_size = 5"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out_dir = Path(os.path.dirname(metric_file))\n",
+    "sc.settings.figdir = out_dir"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -190,7 +202,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Ground truth (perturb)"
+    "## Confusion Matrix"
    ]
   },
   {
@@ -199,18 +211,55 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if 'perturb' in matching_file:\n",
-    "    match_ratio =  (matching[0] == matching[1]).sum() / len(matching[0])\n",
-    "else:\n",
-    "    match_ratio = -1"
+    "celltype_label = adata2.obs[biology_meta].unique().tolist()\n",
+    "region_label = adata2.obs[topology_meta].unique().tolist()\n",
+    "celltype_region_label = adata2.obs['celltype_region'].unique().tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(len(celltype_region_label) / 2, len(celltype_region_label) /2))\n",
+    "cm = confusion_matrix(adata2.obs['celltype_region'], adata2.obs['target_celltype_region'], labels=celltype_region_label)\n",
+    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=celltype_region_label)\n",
+    "disp.plot(cmap='Reds', xticks_rotation='vertical', ax=plt.gca())\n",
+    "plt.savefig(out_dir / 'joint_confusing_matrix.png', dpi=300, bbox_inches='tight')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(len(celltype_label) / 2, len(celltype_label) /2))\n",
+    "cm = confusion_matrix(adata2.obs[biology_meta], adata2.obs['target_celltype'], labels=celltype_label)\n",
+    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=celltype_label)\n",
+    "disp.plot(cmap='Reds', xticks_rotation='vertical', ax=plt.gca())\n",
+    "plt.savefig(out_dir / 'celltype_confusing_matrix.png', dpi=300, bbox_inches='tight')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(len(region_label) / 2, len(region_label) /2))\n",
+    "cm = confusion_matrix(adata2.obs[topology_meta], adata2.obs['target_region'], labels=region_label)\n",
+    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=region_label)\n",
+    "disp.plot(cmap='Reds', xticks_rotation='vertical', ax=plt.gca())\n",
+    "plt.savefig(out_dir / 'region_confusing_matrix.png', dpi=300, bbox_inches='tight')"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Save"
+    "## Ground truth (perturb)"
    ]
   },
   {
@@ -219,8 +268,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "out_dir = Path(os.path.dirname(metric_file))\n",
-    "sc.settings.figdir = out_dir"
+    "if 'perturb' in matching_file:\n",
+    "    match_ratio =  (matching[0] == matching[1]).sum() / len(matching[0])\n",
+    "else:\n",
+    "    match_ratio = -1"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Save"
    ]
   },
   {
 
@@ -84,25 +84,21 @@
     "    biology_meta = 'cell_type'\n",
     "    topology_meta = 'layer_guess'\n",
     "    alpha = 10\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 5\n",
     "elif 'merfish' and 'hypothalamic' in adata1_file:\n",
     "    biology_meta = 'Cell_class'\n",
     "    topology_meta = 'region'\n",
     "    alpha = 25\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 15\n",
     "elif 'stereo' and 'embryo' in adata1_file:\n",
     "    biology_meta = 'annotation'\n",
     "    topology_meta = 'region'\n",
     "    alpha = 3\n",
-    "    LGCN_layer = 1\n",
     "    spot_size = 5\n",
     "elif 'brain' in adata1_file:\n",
     "    biology_meta = 'layer_guess'\n",
     "    topology_meta = 'layer_guess'\n",
     "    alpha = 10\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 5"
    ]
   },
@@ -154,7 +150,7 @@
     "Cal_Spatial_Net(adata1, k_cutoff=20, model='KNN')\n",
     "Cal_Spatial_Net(adata2, k_cutoff=20, model='KNN')\n",
     "edges, features = load_anndatas([adata1, adata2], feature='dpca', singular=True, dim=30)\n",
-    "embd0, embd1, time1 = run_SLAT(features, edges, 6, LGCN_layer=LGCN_layer)\n",
+    "embd0, embd1, time1 = run_SLAT(features, edges, 6, LGCN_layer=3)\n",
     "run_time = str(time.time() - start)\n",
     "print('Runtime: ' + run_time)"
    ]
 
@@ -82,25 +82,21 @@
     "    biology_meta = 'cell_type'\n",
     "    topology_meta = 'layer_guess'\n",
     "    alpha = 10\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 5\n",
     "elif 'merfish' and 'hypothalamic' in adata1_file:\n",
     "    biology_meta = 'Cell_class'\n",
     "    topology_meta = 'region'\n",
     "    alpha = 25\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 15\n",
     "elif 'stereo' and 'embryo' in adata1_file:\n",
     "    biology_meta = 'annotation'\n",
     "    topology_meta = 'region'\n",
     "    alpha = 3\n",
-    "    LGCN_layer = 1\n",
     "    spot_size = 5\n",
     "elif 'brain' in adata1_file:\n",
     "    biology_meta = 'layer_guess'\n",
     "    topology_meta = 'layer_guess'\n",
     "    alpha = 10\n",
-    "    LGCN_layer = 2\n",
     "    spot_size = 5"
    ]
   },
@@ -152,7 +148,7 @@
     "Cal_Spatial_Net(adata1, k_cutoff=20, model='KNN')\n",
     "Cal_Spatial_Net(adata2, k_cutoff=20, model='KNN')\n",
     "edges, features = load_anndatas([adata1, adata2], feature='dpca', singular=True, dim=30)\n",
-    "embd0, embd1, time1 = run_SLAT(features, edges, 6, LGCN_layer=LGCN_layer)\n",
+    "embd0, embd1, time1 = run_SLAT(features, edges, 6, LGCN_layer=3)\n",
     "run_time = str(time.time() - start)\n",
     "print('Runtime: ' + run_time)"
    ]
 
@@ -31,7 +31,7 @@ classifiers = [
 [tool.poetry.dependencies]
 python = "^3.8"
 numpy = ">1.19"
-scipy = ">1.3"
+scipy = ">1.3, <1.9"
 pandas = ">1.1, <2.0"
 matplotlib = ">3.1.2, <3.7"
 seaborn = ">0.9"
 
@@ -462,9 +462,9 @@ class match_3D_celltype(match_3D_multi):
     highlight_cell
         color to highlight the cell
     meta
-        dataframe colname of meta, such as celltype
+        dataframe col name of meta, such as celltype
     expr
-        dataframe colname of gene expr
+        dataframe col name of gene expr
     subsample_size
         subsample size of matches
     reliability