From 196b5a9729fb02c9b8054473c3c4848734ef34f2 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 12 Feb 2025 17:55:42 +0200 Subject: [PATCH 1/4] Add SE and MAE support --- DESCRIPTION | 9 +- NAMESPACE | 8 + R/mixOmics.R | 317 ++++++++++++++++++++++++++++++++++- examples/mixOmics-examples.R | 146 ++++++++++++++++ man/mixOmics-package.Rd | 25 +++ man/mixOmics.Rd | 208 ++++++++++++++++++++++- 6 files changed, 698 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index af36e758..a0594153 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,9 @@ Imports: BiocParallel, utils, gsignal, - rgl + rgl, + SummarizedExperiment, + MultiAssayExperiment Suggests: BiocStyle, knitr, @@ -39,17 +41,16 @@ Authors@R: person("Florian", "Rohart", role = "aut"), person("Ignacio", "Gonzalez", role = "aut"), person("Sebastien", "Dejean", role = "aut"), - ## key contributors person("Al J", "Abadi", role = "ctb", email = "al.jal.abadi@gmail.com"), person("Max", "Bladen", role = "ctb", email = "mbladen19@gmail.com"), person("Benoit", "Gautier", role = "ctb"), person("Francois", "Bartolo", role = "ctb"), - ## also contributions from person("Pierre", "Monget", role = "ctb"), person("Jeff", "Coquery", role = "ctb"), person("FangZou", "Yao", role = "ctb"), person("Benoit", "Liquet", role = "ctb"), - person("Eva", "Hamrud", role = c("ctb", "cre"), email = "mixomicsdeveloper@gmail.com")) + person("Eva", "Hamrud", role = c("ctb", "cre"), email = "mixomicsdeveloper@gmail.com"), + person("Tuomas", "Borman", role = "ctb", comment = c(ORCID = "0000-0002-8563-8884"))) Description: Multivariate methods are well suited to large omics data sets where the number of variables (e.g. genes, proteins, metabolites) is much larger than the number of samples (patients, cells, mice). They have the appealing properties of reducing the dimension of the data by using instrumental variables (components), which are defined as combinations of all variables. Those components are then used to produce useful graphical outputs that enable better understanding of the relationships and correlation structures between the different data sets that are integrated. mixOmics offers a wide range of multivariate methods for the exploration and integration of biological datasets with a particular focus on variable selection. The package proposes several sparse multivariate models we have developed to identify the key variables that are highly correlated, and/or explain the biological outcome of interest. The data that can be analysed with mixOmics may come from high throughput sequencing technologies, such as omics data (transcriptomics, metabolomics, proteomics, metagenomics etc) but also beyond the realm of omics (e.g. spectral imaging). The methods implemented in mixOmics can also handle missing values without having to delete entire rows with missing data. A non exhaustive list of methods include variants of generalised Canonical Correlation Analysis, sparse Partial Least Squares and sparse Discriminant Analysis. Recently we implemented integrative methods to combine multiple data sets: N-integration with variants of Generalised Canonical Correlation Analysis and P-integration with variants of multi-group Partial Least Squares. License: GPL (>= 2) URL: http://www.mixOmics.org diff --git a/NAMESPACE b/NAMESPACE index 58e77579..507aac96 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -187,6 +187,7 @@ export(withinVariation) export(wrapper.rgcca) export(wrapper.sgcca) export(wrapper.sgccda) +exportMethods(mixOmics) import(MASS) import(RColorBrewer) import(corpcor) @@ -196,6 +197,13 @@ import(lattice) import(parallel) importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) +importFrom(MultiAssayExperiment,experiments) +importFrom(MultiAssayExperiment,getWithColData) +importFrom(MultiAssayExperiment,intersectColumns) +importFrom(MultiAssayExperiment,intersectRows) +importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,assayNames) +importFrom(SummarizedExperiment,colData) importFrom(dplyr,arrange) importFrom(dplyr,filter) importFrom(dplyr,group_by) diff --git a/R/mixOmics.R b/R/mixOmics.R index e6b0b61a..2006bc74 100644 --- a/R/mixOmics.R +++ b/R/mixOmics.R @@ -30,15 +30,35 @@ #' #' More details about the PLS modes in \code{?pls}. #' -#' @param X Input data. Either a matrix or a list of data sets (called -#' 'blocks') matching on the same samples. Data should be arranged in samples x +#' \code{X} can also be SummarizedExperiment or MultiAssayExperiment object +#' which are the main data containers in Bioconductor. +#' SummarizedExperiment is designed for single omic data while +#' MultiAssayExperiment streamlines the handling of multi-table data. When +#' SummarizedExperiment object is provided single-table PLS methods are +#' applied. Either MINT or DIABLO is applied, when MultiAssayExperiment is +#' fed as an input. +#' +#' @param X Input data. A matrix. a list of data sets (called +#' 'blocks') matching on the same samples, SummarizedExperiment or +#' MultiAssayExperiment object. Data should be arranged in samples x #' variables, with samples order matching in all data sets. +#' @param experiments A character or integer vector specifying experiments from +#' \code{X} when it is MultiAssayExperiment. +#' @param assay.type A character vector specifying assays from \code{X} when it +#' is SummarizedExperiment or MultiAssayExperiment. +#' @param col.var Character vector specifying column variables from +#' \code{colData(X)} when \code{X} is SummarizedExperiment or +#' MultiAssayExperiment. The values are passed as \code{Y}. #' @param Y Outcome. Either a numeric matrix of responses or a factor or a #' class vector for the discrete outcome. +#' @param MINT A single boolean value to specify whether to apply DIABLO or +#' MINT, when \code{X} is MultiAssayExperiment. #' @param indY To supply if Y is missing, indicates the position of the outcome -#' in the list X +#' in the list X. Disabled when \code{X} is SummarizedExperiment or +#' MultiAssayExperiment. #' @param study grouping factor indicating which samples are from the same -#' study +#' study. Disabled when \code{X} is SummarizedExperiment or +#' MultiAssayExperiment. #' @param ncomp If \code{X} is a data matrix, \code{ncomp} is a single value. #' If \code{X} is a list of data sets, \code{ncomp} is a numeric vector of #' length the number of blocks in \code{X}. The number of components to include @@ -154,9 +174,124 @@ #' graphical outputs to explore relationships between two omics data sets. #' BioData Mining 5:19. #' @keywords multivariate hplot dplot +#' @name mixOmics #' @export #' @example ./examples/mixOmics-examples.R -mixOmics <- function(X, +NULL + +#' @rdname mixOmics +#' @export +setGeneric("mixOmics", signature = "X", function(X, ...) + standardGeneric("mixOmics")) + +#' @rdname mixOmics +#' @export +setMethod("mixOmics", signature = c(X = "MultiAssayExperiment"), function( + X, experiments, assay.type, col.var, MINT = FALSE, ...){ + # Check MINT + if( !(is.logical(MINT) && length(MINT) == 1L) ){ + stop("'MINT' must be TRUE or FALSE.", call. = FALSE) + } + # Get arguments from SE + args <- internal_arguments_from_MAE( + mae = X, experiments = experiments, assay.type = assay.type, + col.var = col.var, MINT = MINT, ...) + # Run analysis + res <- do.call(internal_mixOmics, args) + return(res) + } +) + +#' @rdname mixOmics +#' @export +setMethod("mixOmics", signature = c(X = "SummarizedExperiment"), function( + X, assay.type, col.var, ...){ + # Get arguments from SE + args <- internal_arguments_from_SE( + se = X, assay.type = assay.type, col.var = col.var, ...) + # Run analysis + res <- do.call(internal_mixOmics, args) + return(res) + } +) + +#' @rdname mixOmics +#' @export +setMethod("mixOmics", signature = c(X = "matrix"), function( + X, + Y, + indY, + study, + ncomp, + keepX, + keepY, + design, + tau = NULL, + mode = c("regression", "canonical", "invariant", "classic"), + scale, + tol = 1e-06, + max.iter = 100, + near.zero.var = FALSE){ + res <- internal_mixOmics( + X, + Y, + indY, + study, + ncomp, + keepX, + keepY, + design, + tau = NULL, + mode = c("regression", "canonical", "invariant", "classic"), + scale, + tol = 1e-06, + max.iter = 100, + near.zero.var = FALSE + ) + return(res) + } +) + +#' @rdname mixOmics +#' @export +setMethod("mixOmics", signature = c(X = "list"), function( + X, + Y, + indY, + study, + ncomp, + keepX, + keepY, + design, + tau = NULL, + mode = c("regression", "canonical", "invariant", "classic"), + scale, + tol = 1e-06, + max.iter = 100, + near.zero.var = FALSE, + ...){ + res <- internal_mixOmics( + X, + Y, + indY, + study, + ncomp, + keepX, + keepY, + design, + tau = NULL, + mode = c("regression", "canonical", "invariant", "classic"), + scale, + tol = 1e-06, + max.iter = 100, + near.zero.var = FALSE, + ... + ) + return(res) + } +) + +internal_mixOmics <- function(X, Y, indY, #only use if Y not provided study, #mint @@ -169,7 +304,8 @@ mixOmics <- function(X, scale, tol = 1e-06, max.iter = 100, - near.zero.var = FALSE) + near.zero.var = FALSE, + ...) { mode <- match.arg(mode) @@ -474,3 +610,172 @@ mixOmics <- function(X, } + +# Get arguments from SummarizedExperiment object +internal_arguments_from_SE <- function(se, assay.type, col.var, ...){ + if( missing(assay.type) ){ + stop("Please provide 'assay.type'.", call. = FALSE) + } + if( missing(col.var) ){ + stop("Please provide 'col.var'.", call. = FALSE) + } + # Get arguments for the function passed via ... + args <- list(...) + # Remove Y, indY, and study from arguments because the input data is fully + # described by the SE, and otherwise there could be duplicated parameters. + args <- args[ !names(args) %in% c("Y", "indY", "study") ] + # Get assay + args[["X"]] <- internal_X_from_SE(se = se, assay.type = assay.type) + # Get covariates + args[["Y"]] <- internal_Y_from_SE(se, col.var) + return(args) +} + +# Function to retrieve abundance matrix from SummarizedExperiment object +#' @importFrom SummarizedExperiment assayNames assay +internal_X_from_SE <- function(se, assay.type){ + # Check that assay can be found + if( !(is.character(assay.type) && length(assay.type) == 1L && + assay.type %in% assayNames(se)) ){ + stop("assay.type' must be a single character value from", + "'assayNames(se)'.", call. = FALSE) + } + # Get assay + X <- assay(se, assay.type) |> as.matrix() |> t() + return(X) +} + +# Retrieve covariates from colData of SummarizedExperiment object +#' @importFrom SummarizedExperiment colData +internal_Y_from_SE <- function(se, col.var){ + # Check that col.var exists + if( !( is.character(col.var) && all(col.var %in% colnames(colData(se))) ) ){ + stop("col.var' must be a character value from ", + "'colnames(colData((se))'.", call. = FALSE) + } + # Get values + Y <- colData(se)[ , col.var, drop = FALSE] + # If there are more than 2 columns specified, they must be numeric + # variables. + if( !all(vapply(Y, is.numeric, logical(1L))) && length(col.var) > 1L ){ + stop("'col.var' specifies multiple character or factor columns.", + "It can specify either multiple numeric values or single ", + "column specifying outcome.", call. = FALSE) + } + # If Y specifies class, give it as a vector. + # If the values are numeric, it needs to be a matrix. + if( ncol(Y) == 1L && !is.numeric(Y[[1]]) ){ + Y <- Y[, 1] + } else{ + Y <- Y |> as.matrix() + } + return(Y) +} + +# Get arguments from MultiAssayExperiment object +#' @importFrom MultiAssayExperiment experiments intersectRows intersectColumns +#' getWithColData +internal_arguments_from_MAE <- function( + mae, experiments, assay.type, col.var, MINT, ...){ + # experiments, assay.type and col.var must be specified + if( missing(experiments) || length(experiments) == 0L ){ + stop("Please provide 'experiments'.", call. = FALSE) + } + if( missing(assay.type) || !is.character(assay.type) ){ + stop("Please provide 'assay.type' as a character value.", call. = FALSE) + } + if( missing(col.var) || !is.character(col.var) ){ + stop("Please provide 'col.var' as a character value.", call. = FALSE) + } + # Check that experiments is in correct format + is_index <- is.numeric(experiments) && all(experiments%%1==0) && + all(experiments>0&experiments<=length(experiments(mae))) + is_name <- is.character(experiments) && + all(experiments %in% names(experiments(mae))) + if( !( is_index || is_name ) ){ + stop("'experiments' must specify experiments from 'experiments(mae)'.", + call. = FALSE) + } + # Check that the length of assay.type match with the number of experiments. + # the values are checked later. + if( length(experiments) != length(assay.type) ){ + stop("The length of 'experiments' must match with the length of ", + "'assay.types'.", call. = FALSE) + } + # Take certain experiments + mae <- mae[, , experiments] + + # If user applies MINT, get shared features. If user applies DIABLO, get + # shared samples + if( MINT ){ + mae <- intersectRows(mae) + # If there are no rows anymore + if( length(unlist(rownames(mae))) == 0L ){ + stop("The experiments must include shared features.", call. = FALSE) + } + } else{ + mae <- intersectColumns(mae) + # If there are no samples anymore + if( length(unlist(colnames(mae))) == 0L ){ + stop("The experiments must include shared samples.", call. = FALSE) + } + } + + # Get arguments for the function passed via ... + args <- list(...) + # Remove Y, indY, and study from arguments because the input data is fully + # described by the SE, and otherwise there could be duplicated parameters. + args <- args[ !names(args) %in% c("Y", "indY", "study") ] + + + # Loop over experiments and get the abundance matrix from all + args[["X"]] <- lapply(seq_len(length(experiments)), function(i){ + internal_X_from_SE( + mae[[experiments[[i]]]], assay.type = assay.type[[i]]) + }) + + # For DIABLO, the samples are shared so we can get the class from single + # experiment + if( !MINT ){ + se <- getWithColData(mae, i = 1) |> suppressWarnings() + args[["Y"]] <- internal_Y_from_SE(se, col.var) + } else{ + # For MINT, we have to get classes for all the samples + # Get variable for each sample + Y <- lapply(seq_len(length(experiments)), function(i){ + se <- getWithColData(mae, i = i) |> suppressWarnings() + res <- internal_Y_from_SE(se, col.var) + return(res) + }) + # Combine vectors + if( all(vapply(Y, function(x) is.character(x)||is.factor(x), + logical(1L))) ){ + Y <- unlist(Y) + } else{ + # All fdimensions must be the same + if( !all(diff(vapply(Y, dim, numeric(2L))[2, ]) == 0L) ){ + stop("'col.var' specifies the different number of columns ", + "from experiments. Please check that 'col.var' specifies ", + "shared columns.", call. = FALSE) + } + Y <- do.call(rbind, Y) + } + args[["Y"]] <- Y + } + + # For MINT, we merge X to single matrix + if( MINT ){ + args[["X"]] <- do.call(rbind, args[["X"]]) + # Check that sample names are unique + if( anyDuplicated(rownames(args[["X"]])) ){ + stop("Please provide unique sample names.", call = FALSE) + } + # For MINT, provide also study parameter that tells from where which + # sample comes from + args[["study"]] <- rep(names(experiments(mae)), lengths(colnames(mae))) + } else{ + # For DIABLO, give name for the X. It is a list of experiments + names( args[["X"]] ) <- names(experiments(mae)) + } + return(args) +} diff --git a/examples/mixOmics-examples.R b/examples/mixOmics-examples.R index 0450654c..c63adc2e 100644 --- a/examples/mixOmics-examples.R +++ b/examples/mixOmics-examples.R @@ -61,4 +61,150 @@ study = stemcells$study) # directed towards mint.sPLS-DA because Y is a factor and there is a keepX out = mixOmics(X = stemcells$gene, Y = stemcells$celltype, ncomp = 2, study = stemcells$study, keepX = c(10, 5, 15)) + +## -- Use SummarizedExperiment as an input +# ---------------------------------------------------- + +data(liver.toxicity) +X = liver.toxicity[["gene"]] +Y = liver.toxicity[["clinic"]] +Y.factor = as.factor(liver.toxicity[["treatment"]][, 4]) + +# Create SummarizedExperiment object +library(SummarizedExperiment) +assay <- t(X) +coldata <- DataFrame(Y) +coldata[["outcome"]] <- Y.factor +se <- SummarizedExperiment( + assays = SimpleList(abundance = assay), + colData = coldata +) + +# directed towards PLS +columns <- colnames(colData(se)) +columns <- columns[ !columns %in% c("outcome") ] +out = mixOmics(se, assay.type = "abundance", col.var = columns, ncomp = 2) + +# directed towards sPLS because of keepX and/or keepY +out = mixOmics( + se, assay.type = "abundance", col.var = columns, + ncomp = 2, keepX = c(50, 50), keepY = c(10, 10)) + +# directed towards PLS-DA because Y is a factor +out = mixOmics(se, assay.type = "abundance", col.var = "outcome", ncomp = 2) + +# directed towards sPLS-DA because Y is a factor and there is a keepX +out = mixOmics( + se, assay.type = "abundance", col.var = "outcome", + ncomp = 2, keepX = c(20, 20)) + +## -- Use MultiAssayExperiment as an input +# ---------------------------------------------------- + +# Create MultiAssayExperiment +data(nutrimouse) +Y = unmap(nutrimouse[["diet"]]) +se1 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["gene"]]))) +se2 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["lipid"]]))) +coldata <- DataFrame(Y) +colnames(coldata) <- levels(Y) +coldata[["diet"]] <- nutrimouse[["diet"]] +rownames(coldata) <- colnames(se1) +mae <- MultiAssayExperiment( + experiments = ExperimentList(gene = se1, lipid = se2), + colData = coldata +) + +## -- directed towards block.pls framework because X is a MultiAssayExperiment +# ---------------------------------------------------- + +# directed towards block PLS +columns <- colnames(colData(mae)) +columns <- columns[ !columns %in% c("diet") ] +out = mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3) + +# directed towards block sPLS because of keepX and/or keepY +out = mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15)) +) + +# directed towards block PLS-DA because Y is a factor +out <- mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = "diet", ncomp = 3) + +# directed towards block sPLS-DA because Y is a factor and there is a keepX +out <- mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = "diet", ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15)) +) + +# Create MultiAssayExperiment +data(nutrimouse) +assay <- t(stemcells[["gene"]]) +coldata <- DataFrame(unmap(stemcells[["celltype"]])) +colnames(coldata) <- levels(unmap(stemcells[["celltype"]])) +rownames(coldata) <- colnames(assay) +se <- SummarizedExperiment( + assays = SimpleList(abundance = assay), + colData = coldata +) +se[["celltype"]] <- stemcells[["celltype"]] +# Split into MultiAssayExperiment +if( !require("mia") ){ + BiocManager::install("mia") +} +se_list <- splitOn(se, by = "columns", f = stemcells[["study"]]) +names(se_list) <- paste0("study_", names(se_list)) +mae <- MultiAssayExperiment(experiments = se_list) + +## -- directed towards mint.pls framework because of the specified MINT argument +# ---------------------------------------------------- + +# Get columns from the first experiment/study +columns <- colnames(colData(mae[[1]])) +columns <- columns[ !columns %in% c("celltype") ] + +# directed towards PLS (note that we are using SummarizedExperiment) +out = mixOmics(se, assay.type = "abundance", col.var = columns, ncomp = 2) + +# directed towards mint.PLS +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 2, MINT = TRUE) + +# directed towards mint.sPLS because of keepX and/or keepY +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3, MINT = TRUE, + keepX = c(10, 5, 15) +) + +# directed towards mint.PLS-DA because we specify class +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = "celltype", ncomp = 2, MINT = TRUE +) + +# directed towards mint.sPLS-DA because Y is a factor and there is a keepX +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = "celltype", ncomp = 3, MINT = TRUE, keepX = c(10, 5, 15) +) + } \ No newline at end of file diff --git a/man/mixOmics-package.Rd b/man/mixOmics-package.Rd index 5ef1bcf1..b0a0edb3 100644 --- a/man/mixOmics-package.Rd +++ b/man/mixOmics-package.Rd @@ -42,3 +42,28 @@ Useful links: } } +\author{ +\strong{Maintainer}: Eva Hamrud \email{mixomicsdeveloper@gmail.com} [contributor] + +Authors: +\itemize{ + \item Kim-Anh Le Cao \email{kimanh.lecao@unimelb.edu.au} + \item Florian Rohart + \item Ignacio Gonzalez + \item Sebastien Dejean +} + +Other contributors: +\itemize{ + \item Al J Abadi \email{al.jal.abadi@gmail.com} [contributor] + \item Max Bladen \email{mbladen19@gmail.com} [contributor] + \item Benoit Gautier [contributor] + \item Francois Bartolo [contributor] + \item Pierre Monget [contributor] + \item Jeff Coquery [contributor] + \item FangZou Yao [contributor] + \item Benoit Liquet [contributor] + \item Tuomas Borman (\href{https://orcid.org/0000-0002-8563-8884}{ORCID}) [contributor] +} + +} diff --git a/man/mixOmics.Rd b/man/mixOmics.Rd index 07be38f3..3ad2294a 100644 --- a/man/mixOmics.Rd +++ b/man/mixOmics.Rd @@ -2,9 +2,19 @@ % Please edit documentation in R/mixOmics.R \name{mixOmics} \alias{mixOmics} +\alias{mixOmics,MultiAssayExperiment-method} +\alias{mixOmics,SummarizedExperiment-method} +\alias{mixOmics,matrix-method} +\alias{mixOmics,list-method} \title{PLS-derived methods: one function to rule them all!} \usage{ -mixOmics( +mixOmics(X, ...) + +\S4method{mixOmics}{MultiAssayExperiment}(X, experiments, assay.type, col.var, MINT = FALSE, ...) + +\S4method{mixOmics}{SummarizedExperiment}(X, assay.type, col.var, ...) + +\S4method{mixOmics}{matrix}( X, Y, indY, @@ -20,20 +30,54 @@ mixOmics( max.iter = 100, near.zero.var = FALSE ) + +\S4method{mixOmics}{list}( + X, + Y, + indY, + study, + ncomp, + keepX, + keepY, + design, + tau = NULL, + mode = c("regression", "canonical", "invariant", "classic"), + scale, + tol = 1e-06, + max.iter = 100, + near.zero.var = FALSE, + ... +) } \arguments{ -\item{X}{Input data. Either a matrix or a list of data sets (called -'blocks') matching on the same samples. Data should be arranged in samples x +\item{X}{Input data. A matrix. a list of data sets (called +'blocks') matching on the same samples, SummarizedExperiment or +MultiAssayExperiment object. Data should be arranged in samples x variables, with samples order matching in all data sets.} +\item{experiments}{A character or integer vector specifying experiments from +\code{X} when it is MultiAssayExperiment.} + +\item{assay.type}{A character vector specifying assays from \code{X} when it +is SummarizedExperiment or MultiAssayExperiment.} + +\item{col.var}{Character vector specifying column variables from +\code{colData(X)} when \code{X} is SummarizedExperiment or +MultiAssayExperiment. The values are passed as \code{Y}.} + +\item{MINT}{A single boolean value to specify whether to apply DIABLO or +MINT, when \code{X} is MultiAssayExperiment.} + \item{Y}{Outcome. Either a numeric matrix of responses or a factor or a class vector for the discrete outcome.} \item{indY}{To supply if Y is missing, indicates the position of the outcome -in the list X} +in the list X. Disabled when \code{X} is SummarizedExperiment or +MultiAssayExperiment.} \item{study}{grouping factor indicating which samples are from the same -study} +study. Disabled when \code{X} is SummarizedExperiment or +MultiAssayExperiment.} \item{ncomp}{If \code{X} is a data matrix, \code{ncomp} is a single value. If \code{X} is a list of data sets, \code{ncomp} is a numeric vector of @@ -103,6 +147,14 @@ analysis) and whether you input a \code{study} parameter (MINT analysis) or a \code{keepX} parameter (sparse analysis). More details about the PLS modes in \code{?pls}. + +\code{X} can also be SummarizedExperiment or MultiAssayExperiment object +which are the main data containers in Bioconductor. +SummarizedExperiment is designed for single omic data while +MultiAssayExperiment streamlines the handling of multi-table data. When +SummarizedExperiment object is provided single-table PLS methods are +applied. Either MINT or DIABLO is applied, when MultiAssayExperiment is +fed as an input. } \examples{ ## -- directed towards PLS framework because X is a matrix and the study argument is missing @@ -168,6 +220,152 @@ study = stemcells$study) # directed towards mint.sPLS-DA because Y is a factor and there is a keepX out = mixOmics(X = stemcells$gene, Y = stemcells$celltype, ncomp = 2, study = stemcells$study, keepX = c(10, 5, 15)) + +## -- Use SummarizedExperiment as an input +# ---------------------------------------------------- + +data(liver.toxicity) +X = liver.toxicity[["gene"]] +Y = liver.toxicity[["clinic"]] +Y.factor = as.factor(liver.toxicity[["treatment"]][, 4]) + +# Create SummarizedExperiment object +library(SummarizedExperiment) +assay <- t(X) +coldata <- DataFrame(Y) +coldata[["outcome"]] <- Y.factor +se <- SummarizedExperiment( + assays = SimpleList(abundance = assay), + colData = coldata +) + +# directed towards PLS +columns <- colnames(colData(se)) +columns <- columns[ !columns \%in\% c("outcome") ] +out = mixOmics(se, assay.type = "abundance", col.var = columns, ncomp = 2) + +# directed towards sPLS because of keepX and/or keepY +out = mixOmics( + se, assay.type = "abundance", col.var = columns, + ncomp = 2, keepX = c(50, 50), keepY = c(10, 10)) + +# directed towards PLS-DA because Y is a factor +out = mixOmics(se, assay.type = "abundance", col.var = "outcome", ncomp = 2) + +# directed towards sPLS-DA because Y is a factor and there is a keepX +out = mixOmics( + se, assay.type = "abundance", col.var = "outcome", + ncomp = 2, keepX = c(20, 20)) + +## -- Use MultiAssayExperiment as an input +# ---------------------------------------------------- + +# Create MultiAssayExperiment +data(nutrimouse) +Y = unmap(nutrimouse[["diet"]]) +se1 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["gene"]]))) +se2 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["lipid"]]))) +coldata <- DataFrame(Y) +colnames(coldata) <- levels(Y) +coldata[["diet"]] <- nutrimouse[["diet"]] +rownames(coldata) <- colnames(se1) +mae <- MultiAssayExperiment( + experiments = ExperimentList(gene = se1, lipid = se2), + colData = coldata +) + +## -- directed towards block.pls framework because X is a MultiAssayExperiment +# ---------------------------------------------------- + +# directed towards block PLS +columns <- colnames(colData(mae)) +columns <- columns[ !columns \%in\% c("diet") ] +out = mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3) + +# directed towards block sPLS because of keepX and/or keepY +out = mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15)) +) + +# directed towards block PLS-DA because Y is a factor +out <- mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = "diet", ncomp = 3) + +# directed towards block sPLS-DA because Y is a factor and there is a keepX +out <- mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = "diet", ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15)) +) + +# Create MultiAssayExperiment +data(nutrimouse) +assay <- t(stemcells[["gene"]]) +coldata <- DataFrame(unmap(stemcells[["celltype"]])) +colnames(coldata) <- levels(unmap(stemcells[["celltype"]])) +rownames(coldata) <- colnames(assay) +se <- SummarizedExperiment( + assays = SimpleList(abundance = assay), + colData = coldata +) +se[["celltype"]] <- stemcells[["celltype"]] +# Split into MultiAssayExperiment +if( !require("mia") ){ + BiocManager::install("mia") +} +se_list <- splitOn(se, by = "columns", f = stemcells[["study"]]) +names(se_list) <- paste0("study_", names(se_list)) +mae <- MultiAssayExperiment(experiments = se_list) + +## -- directed towards mint.pls framework because of the specified MINT argument +# ---------------------------------------------------- + +# Get columns from the first experiment/study +columns <- colnames(colData(mae[[1]])) +columns <- columns[ !columns \%in\% c("celltype") ] + +# directed towards PLS (note that we are using SummarizedExperiment) +out = mixOmics(se, assay.type = "abundance", col.var = columns, ncomp = 2) + +# directed towards mint.PLS +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 2, MINT = TRUE) + +# directed towards mint.sPLS because of keepX and/or keepY +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = columns, ncomp = 3, MINT = TRUE, + keepX = c(10, 5, 15) +) + +# directed towards mint.PLS-DA because we specify class +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = "celltype", ncomp = 2, MINT = TRUE +) + +# directed towards mint.sPLS-DA because Y is a factor and there is a keepX +out = mixOmics( + mae, experiments = c("study_1", "study_3"), + assay.type = c("abundance", "abundance"), + col.var = "celltype", ncomp = 3, MINT = TRUE, keepX = c(10, 5, 15) +) + } } \references{ From 587f63388186217c4a38a1b69927e8901c8caa65 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 26 Feb 2025 13:35:04 +0200 Subject: [PATCH 2/4] up --- R/mixOmics.R | 1 + man/mixOmics.Rd | 2 ++ 2 files changed, 3 insertions(+) diff --git a/R/mixOmics.R b/R/mixOmics.R index 2006bc74..23441403 100644 --- a/R/mixOmics.R +++ b/R/mixOmics.R @@ -87,6 +87,7 @@ #' function (should be set to TRUE in particular for data with many zero #' values). Setting this argument to FALSE (when appropriate) will speed up the #' computations. Default value is FALSE +#' @param ... Additional arguments. #' @return none #' @author Florian Rohart, Kim-Anh LĂȘ Cao, Al J Abadi #' @seealso \code{\link{pls}}, \code{\link{spls}}, \code{\link{plsda}}, diff --git a/man/mixOmics.Rd b/man/mixOmics.Rd index 3ad2294a..af0dd86a 100644 --- a/man/mixOmics.Rd +++ b/man/mixOmics.Rd @@ -55,6 +55,8 @@ mixOmics(X, ...) MultiAssayExperiment object. Data should be arranged in samples x variables, with samples order matching in all data sets.} +\item{...}{Additional arguments.} + \item{experiments}{A character or integer vector specifying experiments from \code{X} when it is MultiAssayExperiment.} From 57044189b659dd111bb4c4cb65140e438251c7fc Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Wed, 26 Feb 2025 14:06:26 +0200 Subject: [PATCH 3/4] Add unit tests --- R/mixOmics.R | 2 ++ tests/testthat/test-mixOmics.R | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 tests/testthat/test-mixOmics.R diff --git a/R/mixOmics.R b/R/mixOmics.R index 23441403..20c3eac1 100644 --- a/R/mixOmics.R +++ b/R/mixOmics.R @@ -199,6 +199,7 @@ setMethod("mixOmics", signature = c(X = "MultiAssayExperiment"), function( col.var = col.var, MINT = MINT, ...) # Run analysis res <- do.call(internal_mixOmics, args) + res$call <- NULL return(res) } ) @@ -212,6 +213,7 @@ setMethod("mixOmics", signature = c(X = "SummarizedExperiment"), function( se = X, assay.type = assay.type, col.var = col.var, ...) # Run analysis res <- do.call(internal_mixOmics, args) + res$call <- NULL return(res) } ) diff --git a/tests/testthat/test-mixOmics.R b/tests/testthat/test-mixOmics.R new file mode 100644 index 00000000..a49be7f5 --- /dev/null +++ b/tests/testthat/test-mixOmics.R @@ -0,0 +1,56 @@ +context("mixOmics") + +test_that("Test that PLS works with SE", { + data(liver.toxicity) + X <- liver.toxicity$gene + Y <- liver.toxicity$clinic + Y.factor <- as.factor(liver.toxicity$treatment[, 4]) + # + out_ref <- mixOmics(X, Y, ncomp = 2) + # SummarizedExperiment + library(SummarizedExperiment) + assay <- t(X) + coldata <- DataFrame(Y) + coldata[["outcome"]] <- Y.factor + se <- SummarizedExperiment( + assays = SimpleList(abundance = assay), + colData = coldata + ) + columns <- colnames(colData(se)) + columns <- columns[ !columns %in% c("outcome") ] + out = mixOmics(se, assay.type = "abundance", col.var = columns, ncomp = 2) + # + expect_equal(out$Y, out_ref$Y) +}) + +test_that("Test that block sPLS-DA works with MAE", { + data(nutrimouse) + data <- list(gene = nutrimouse$gene, lipid = nutrimouse$lipid) + # + out_ref <- mixOmics(X = data, Y = nutrimouse$diet, ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15))) + # MultiAssayExperiment + library(SummarizedExperiment) + library(MultiAssayExperiment) + data(nutrimouse) + Y = unmap(nutrimouse[["diet"]]) + se1 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["gene"]]))) + se2 <- SummarizedExperiment( + assays = SimpleList(abundance = t(nutrimouse[["lipid"]]))) + coldata <- DataFrame(diet = nutrimouse[["diet"]]) + rownames(coldata) <- colnames(se1) + mae <- MultiAssayExperiment( + experiments = ExperimentList(gene = se1, lipid = se2), + colData = coldata + ) + columns <- colnames(colData(mae)) + out <- mixOmics( + mae, experiments = c("gene", "lipid"), + assay.type = c("abundance", "abundance"), + col.var = "diet", ncomp = 3, + keepX = list(gene = c(10,10), lipid = c(15,15)) + ) + # + expect_equal(out$loadings, out_ref$loadings) +}) \ No newline at end of file From 116b99b3ac4228a57647be4d660ccd3ba416c3f5 Mon Sep 17 00:00:00 2001 From: TuomasBorman Date: Sat, 8 Mar 2025 19:12:12 +0200 Subject: [PATCH 4/4] Minor fix --- R/mixOmics.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mixOmics.R b/R/mixOmics.R index 20c3eac1..850960ad 100644 --- a/R/mixOmics.R +++ b/R/mixOmics.R @@ -694,7 +694,7 @@ internal_arguments_from_MAE <- function( is_index <- is.numeric(experiments) && all(experiments%%1==0) && all(experiments>0&experiments<=length(experiments(mae))) is_name <- is.character(experiments) && - all(experiments %in% names(experiments(mae))) + all(experiments %in% names(mae)) if( !( is_index || is_name ) ){ stop("'experiments' must specify experiments from 'experiments(mae)'.", call. = FALSE) @@ -775,10 +775,10 @@ internal_arguments_from_MAE <- function( } # For MINT, provide also study parameter that tells from where which # sample comes from - args[["study"]] <- rep(names(experiments(mae)), lengths(colnames(mae))) + args[["study"]] <- rep(names(mae), lengths(colnames(mae))) } else{ # For DIABLO, give name for the X. It is a list of experiments - names( args[["X"]] ) <- names(experiments(mae)) + names( args[["X"]] ) <- names(mae) } return(args) }