From ad22867418a9d5b6127b6113873c655b8efb4bd0 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:17:19 +0200 Subject: [PATCH 1/3] add method STACAS --- src/methods/stacas/config.vsh.yaml | 81 ++++++++++++++++++++++++++++++ src/methods/stacas/script.R | 56 +++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 src/methods/stacas/config.vsh.yaml create mode 100644 src/methods/stacas/script.R diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml new file mode 100644 index 00000000..ab08bac7 --- /dev/null +++ b/src/methods/stacas/config.vsh.yaml @@ -0,0 +1,81 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_method.yaml + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: stacas +# A relatively short label, used when rendering visualisations (required) +label: STACAS +# A one sentence summary of how this method works (required). Used when +# rendering summary tables. +summary: Accurate semi-supervised integration of single-cell transcriptomics data +# A multi-line description of how this component works (required). Used +# when rendering reference documentation. +description: | + STACAS is a method for scRNA-seq integration, + especially suited to accurately integrate datasets with large cell type imbalance + (e.g. in terms of proportions of distinct cell populations). + Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform + semi-supervised integration, leading to increased preservation of biological variability + in the resulting integrated space. + STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks. +references: + doi: 10.1038/s41467-024-45240-z + # Andreatta M, Hérault L, Gueguen P, Gfeller D, Berenstein AJ, Carmona SJ. + # Semi-supervised integration of single-cell transcriptomics data. + # Nature Communications*. 2024;15(1):1-13. doi:10.1038/s41467-024-45240-z +links: + # URL to the documentation for this method (required). + documentation: https://carmonalab.github.io/STACAS.demo/STACAS.demo.html + # URL to the code repository for this method (required). + repository: https://github.com/carmonalab/STACAS +# Metadata for your component +info: + # Which normalisation method this component prefers to use (required). + preferred_normalization: log_cp10k + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: r_script + path: script.R + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: openproblems/base_r:1.0.0 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + setup: + - type: r + #github: https://github.com/carmonalab/STACAS.git@2.2.0 + cran: + - Seurat + - SeuratObject + - R.utils + bioc: + - BiocNeighbors + - BiocParallel + script: remotes::install_github("carmonalab/STACAS@2.2.0", dependencies = FALSE) + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/methods/stacas/script.R b/src/methods/stacas/script.R new file mode 100644 index 00000000..19fcaf31 --- /dev/null +++ b/src/methods/stacas/script.R @@ -0,0 +1,56 @@ +requireNamespace("anndata", quietly = TRUE) +suppressPackageStartupMessages({ + library(STACAS) + library(Matrix) + library(SeuratObject) + library(Seurat) +}) + +## VIASH START +par <- list( + input = "resources_test/task_batch_integration/cxg_immune_cell_atlas/dataset.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "stacas" +) +## VIASH END + +cat("Reading input file\n") +adata <- anndata::read_h5ad(par[["input"]]) + +cat("Create Seurat object\n") +# Transpose because Seurat expects genes in rows, cells in columns +counts_r <- Matrix::t(adata$layers[["counts"]]) +normalized_r <- Matrix::t(adata$layers[["normalized"]]) +# Convert to a regular sparse matrix first and then to dgCMatrix +counts_c <- as(as(counts_r, "CsparseMatrix"), "dgCMatrix") +normalized_c <- as(as(normalized_r, "CsparseMatrix"), "dgCMatrix") + +# Create Seurat object with raw counts, these are needed to compute Variable Genes +seurat_obj <- Seurat::CreateSeuratObject(counts = counts_c, + meta.data = adata$obs) +# Manually assign pre-normalized values to the "data" slot +seurat_obj@assays$RNA$data <- normalized_c + +cat("Run STACAS\n") +object_integrated <- seurat_obj |> + Seurat::SplitObject(split.by = "batch") |> + STACAS::Run.STACAS() + +cat("Store outputs\n") +output <- anndata::AnnData( + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = meta$name + ), + obs = adata$obs, + var = adata$var, + obsm = list( + X_emb = object_integrated@reductions$pca@cell.embeddings + ) +) + +cat("Write output AnnData to file\n") +output$write_h5ad(par[["output"]], compression = "gzip") From a0e70f17582c85b829ffc1c90e74885f03e2b0c2 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:20:08 +0200 Subject: [PATCH 2/3] add method STACAS --- src/methods/stacas/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/stacas/config.vsh.yaml b/src/methods/stacas/config.vsh.yaml index ab08bac7..502f38d2 100644 --- a/src/methods/stacas/config.vsh.yaml +++ b/src/methods/stacas/config.vsh.yaml @@ -21,7 +21,7 @@ description: | (e.g. in terms of proportions of distinct cell populations). Prior cell type knowledge, given as cell type labels, can be provided to the algorithm to perform semi-supervised integration, leading to increased preservation of biological variability - in the resulting integrated space. + in the resulting integrated space. STACAS is robust to incomplete cell type labels and can be applied to large-scale integration tasks. references: doi: 10.1038/s41467-024-45240-z From 3d8adcb32319b8c4f9e393e5c2d9aa953a0f84f2 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 16 Apr 2025 12:52:12 +0200 Subject: [PATCH 3/3] updata changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09d672d0..22b80c7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # task_batch_integration devel +## New functionality +* Add `methods/stacas` new method. +Add non-supervised version of STACAS tool for integration of single-cell transcriptomics data. +This functionality enables correction of batch effects while preserving biological variability without requiring prior cell type annotations. + ## New functionality * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52).