From ad22867418a9d5b6127b6113873c655b8efb4bd0 Mon Sep 17 00:00:00 2001
From: JGarnica22 <j.garnica22@gmail.com>
Date: Wed, 16 Apr 2025 12:17:19 +0200
Subject: [PATCH 1/3] add method STACAS

---
 src/methods/stacas/config.vsh.yaml | 81 ++++++++++++++++++++++++++++++
 src/methods/stacas/script.R        | 56 +++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 src/methods/stacas/config.vsh.yaml
 create mode 100644 src/methods/stacas/script.R

diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml
new file mode 100644
index 00000000..ab08bac7
--- /dev/null
+++ b/src/methods/stacas/config.vsh.yaml
@@ -0,0 +1,81 @@
+# The API specifies which type of component this is.
+# It contains specifications for:
+#   - The input/output files
+#   - Common parameters
+#   - A unit test
+__merge__: ../../api/comp_method.yaml
+
+# A unique identifier for your component (required).
+# Can contain only lowercase letters or underscores.
+name: stacas
+# A relatively short label, used when rendering visualisations (required)
+label: STACAS
+# A one sentence summary of how this method works (required). Used when 
+# rendering summary tables.
+summary: Accurate semi-supervised integration of single-cell transcriptomics data
+# A multi-line description of how this component works (required). Used
+# when rendering reference documentation.
+description: |
+  STACAS is a method for scRNA-seq integration,
+  especially suited to accurately integrate datasets with large cell type imbalance
+  (e.g. in terms of proportions of distinct cell populations).
+  Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform
+  semi-supervised integration, leading to increased preservation of biological variability
+   in the resulting integrated space.
+  STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks.
+references:
+  doi: 10.1038/s41467-024-45240-z
+  # Andreatta M, Hérault L, Gueguen P, Gfeller D, Berenstein AJ, Carmona SJ.
+  # Semi-supervised integration of single-cell transcriptomics data.
+  # Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z
+links:
+  # URL to the documentation for this method (required).
+  documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html
+  # URL to the code repository for this method (required).
+  repository: https://github.com/carmonalab/STACAS
+# Metadata for your component
+info:
+  # Which normalisation method this component prefers to use (required).
+  preferred_normalization: log_cp10k
+
+# Component-specific parameters (optional)
+# arguments:
+#   - name: "--n_neighbors"
+#     type: "integer"
+#     default: 5
+#     description: Number of neighbors to use.
+
+# Resources required to run the component
+resources:
+  # The script of your component (required)
+  - type: r_script
+    path: script.R
+  # Additional resources your script needs (optional)
+  # - type: file
+  #   path: weights.pt
+
+engines:
+  # Specifications for the Docker image for this component.
+  - type: docker
+    image: openproblems/base_r:1.0.0
+    # Add custom dependencies here (optional). For more information, see
+    # https://viash.io/reference/config/engines/docker/#setup .
+    setup:
+      - type: r
+        #github: https://github.com/carmonalab/STACAS.git@2.2.0
+        cran:
+          - Seurat
+          - SeuratObject
+          - R.utils
+        bioc:
+          - BiocNeighbors
+          - BiocParallel
+        script: remotes::install_github("carmonalab/STACAS@2.2.0", dependencies = FALSE)
+
+runners:
+  # This platform allows running the component natively
+  - type: executable
+  # Allows turning the component into a Nextflow module / pipeline.
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/methods/stacas/script.R b/src/methods/stacas/script.R
new file mode 100644
index 00000000..19fcaf31
--- /dev/null
+++ b/src/methods/stacas/script.R
@@ -0,0 +1,56 @@
+requireNamespace("anndata", quietly = TRUE)
+suppressPackageStartupMessages({
+  library(STACAS)
+  library(Matrix)
+  library(SeuratObject)
+  library(Seurat)
+})
+
+## VIASH START
+par <- list(
+  input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad",
+  output = "output.h5ad"
+)
+meta <- list(
+  name = "stacas"
+)
+## VIASH END
+
+cat("Reading input file\n")
+adata <- anndata::read_h5ad(par[["input"]])
+
+cat("Create Seurat object\n")
+# Transpose because Seurat expects genes in rows, cells in columns
+counts_r <- Matrix::t(adata$layers[["counts"]])
+normalized_r <- Matrix::t(adata$layers[["normalized"]])
+# Convert to a regular sparse matrix first and then to dgCMatrix
+counts_c <- as(as(counts_r, "CsparseMatrix"), "dgCMatrix")
+normalized_c <- as(as(normalized_r, "CsparseMatrix"), "dgCMatrix")
+
+# Create Seurat object with raw counts, these are needed to compute Variable Genes
+seurat_obj <- Seurat::CreateSeuratObject(counts = counts_c,
+                                         meta.data = adata$obs)
+# Manually assign pre-normalized values to the "data" slot
+seurat_obj@assays$RNA$data <- normalized_c
+
+cat("Run STACAS\n")
+object_integrated <- seurat_obj |>
+      Seurat::SplitObject(split.by = "batch") |>
+      STACAS::Run.STACAS() 
+
+cat("Store outputs\n")
+output <- anndata::AnnData(
+    uns = list(
+    dataset_id = adata$uns[["dataset_id"]],
+    normalization_id = adata$uns[["normalization_id"]],
+    method_id = meta$name
+  ),
+  obs = adata$obs,
+  var = adata$var,
+  obsm = list(
+    X_emb = object_integrated@reductions$pca@cell.embeddings
+  )
+)
+
+cat("Write output AnnData to file\n")
+output$write_h5ad(par[["output"]], compression = "gzip")

From a0e70f17582c85b829ffc1c90e74885f03e2b0c2 Mon Sep 17 00:00:00 2001
From: JGarnica22 <j.garnica22@gmail.com>
Date: Wed, 16 Apr 2025 12:20:08 +0200
Subject: [PATCH 2/3] add method STACAS

---
 src/methods/stacas/config.vsh.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml
index ab08bac7..502f38d2 100644
--- a/src/methods/stacas/config.vsh.yaml
+++ b/src/methods/stacas/config.vsh.yaml
@@ -21,7 +21,7 @@ description: |
   (e.g. in terms of proportions of distinct cell populations).
   Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform
   semi-supervised integration, leading to increased preservation of biological variability
-   in the resulting integrated space.
+  in the resulting integrated space.
   STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks.
 references:
   doi: 10.1038/s41467-024-45240-z

From 3d8adcb32319b8c4f9e393e5c2d9aa953a0f84f2 Mon Sep 17 00:00:00 2001
From: JGarnica22 <j.garnica22@gmail.com>
Date: Wed, 16 Apr 2025 12:52:12 +0200
Subject: [PATCH 3/3] updata changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 09d672d0..22b80c7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # task_batch_integration devel
 
+## New functionality
+* Add `methods/stacas` new method.
+Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data.
+This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations.
+
 ## New functionality
 
 * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52).