Skip to content

Commit b523694

Browse files
committed
fix normalizers to take gene x sample input
1 parent 61b82f1 commit b523694

File tree

1 file changed

+41
-38
lines changed

1 file changed

+41
-38
lines changed

R/rnaseq.R

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -427,19 +427,18 @@ best.cluster <- function(clusters.df, features = c('Rank1Residuals', 'size'), we
427427
}
428428

429429
#' Normalizing (global-scaling) using references
430-
#'
431-
#' normalize a read count matrix given the set of reference genes identified by id
432-
#' @param X a read-count matrix of the form samples x genes(transcripts)
430+
#'
431+
#' Normalize a read count matrix given the set of reference genes identified by id
432+
#' @param X a read-count matrix of the form genes x samples
433433
#' @param ref.idx an integer vector specifying the column indices of X to be used as reference
434-
#' @param scale (default to TRUE) whether to scale the normalizing factors such that their geometric mean is 1
435434
#' @export
436435
normalize.by.refs <- function(X, ref.idx, scale = TRUE) {
437436
if (length(ref.idx) == 1) { # need to be treated specially since dim(X) reduces to NULL and cause error in apply
438-
Xref = matrix(X[,ref.idx], ncol=1)
437+
Xref = matrix(X[ref.idx,], nrow=1)
439438
} else {
440-
Xref = X[,ref.idx]
439+
Xref = X[ref.idx,]
441440
}
442-
normFactors = apply(Xref,MARGIN = 1,FUN = sum)
441+
normFactors = colSums(Xref) # apply(Xref,MARGIN = 2,FUN = sum)
443442
if (scale) {
444443
normFactors = normFactors / geom.mean(normFactors)
445444
}
@@ -450,32 +449,47 @@ normalize.by.refs <- function(X, ref.idx, scale = TRUE) {
450449
return(NULL)
451450
}
452451

453-
X.norm = t(sapply(1:length(normFactors), FUN = function(i) {
454-
return(X[i,] / normFactors[i])
455-
}))
452+
X.norm = sweep(X, MARGIN = 2, STATS = normFactors, FUN = '/')
456453
colnames(X.norm) = colnames(X)
457454
rownames(X.norm) = rownames(X)
458455
return(X.norm)
459456
}
460457

458+
#' normalize.by.scaleFactors
459+
#'
460+
#' @param X count matrix in the form genes x samples
461+
#' @scaleFactors a vector of scaling factor, must be the same length as the number of samples
462+
#' @export
463+
normalize.by.scaleFactors <- function(X, scaleFactors) {
464+
if (length(scaleFactors) != ncol(X)) {
465+
stop("scaleFactors should have the same length as number of samples in the input matrix.")
466+
}
467+
X.normed = sapply(1:length(scaleFactors), FUN = function(j) {
468+
return(X[,j]/scaleFactors[j])
469+
})
470+
`colnames<-`(X.normed, colnames(X))
471+
return(X.normed)
472+
}
473+
474+
461475
#' Normalize by Upper Quantile
462476
#'
463477
#' @import edgeR
464478
#' @export
465479
normalize.by.uq <- function(X, ...) {
466-
effLibSizes = apply(X, 1, sum) * edgeR::calcNormFactors(t(X), method = 'upperquartile', ...) # effective library sizes
467-
sweep(X, 1, mean(effLibSizes) / effLibSizes, "*") %>%
468-
return()
480+
effLibSizes = colSums(X) * edgeR::calcNormFactors(X, method = 'upperquartile', group = group, ...) # effective library sizes
481+
sweep(X, 2, effLibSizes, "/") %>%
482+
return()
469483
}
470484

471485
#' Call calcNormFactors by edgeR
472486
#'
473487
#' @import edgeR
474488
#' @export
475489
normalize.by.tmm <- function(X,...) {
476-
effLibSizes = apply(X, 1, sum) * edgeR::calcNormFactors(t(X), method = 'TMM', group = group, ...) # effective library sizes
477-
sweep(X, 1, mean(effLibSizes) / effLibSizes, "*") %>%
478-
return()
490+
effLibSizes = colSums(X) * edgeR::calcNormFactors(X, method = 'TMM', group = group, ...) # effective library sizes
491+
sweep(X, 2, effLibSizes, "/") %>%
492+
return()
479493
}
480494

481495
#' Call estimateSizeFactorsForMatrix by DESeq2
@@ -484,10 +498,8 @@ normalize.by.tmm <- function(X,...) {
484498
#' @param X read count matrix with samples in rows and genes in columns
485499
#' @export
486500
normalize.by.deseq <- function(X, ...) {
487-
X %>%
488-
t() %>%
489-
DESeq2::estimateSizeFactorsForMatrix(...) %>%
490-
sweep(X, 1, ., "/") %>%
501+
DESeq2::estimateSizeFactorsForMatrix(X) %>%
502+
sweep(X, 2, ., "/") %>%
491503
return()
492504
}
493505

@@ -496,21 +508,19 @@ normalize.by.deseq <- function(X, ...) {
496508
#' @import PoissonSeq
497509
#' @export
498510
normalize.by.poissonseq <- function(X, ...) {
499-
PoissonSeq::PS.Est.Depth(t(X), ...) %>%
500-
sweep(X, 1, ., "/") %>%
501-
return()
511+
PoissonSeq::PS.Est.Depth(X, ...) %>%
512+
sweep(X, 2, ., "/") %>%
513+
return()
502514
}
503515

504516
#' Normalize by DEGES
505517
#'
506518
#' @import TCC
507519
#' @export
508520
normalize.by.deges <- function(X, group, norm.method = 'tmm', test.method = 'edger', iteration = 1) {
509-
t(X) %>%
510-
TCC::TCC(group) %>%
521+
TCC::TCC(X, group) %>%
511522
TCC::calcNormFactors(norm.method = 'tmm', test.method = 'edger', iteration = iteration) %>%
512523
TCC::getNormalizedData() %>%
513-
t() %>%
514524
return()
515525
}
516526

@@ -519,28 +529,21 @@ normalize.by.deges <- function(X, group, norm.method = 'tmm', test.method = 'edg
519529
#' @import RUVSeq
520530
#' @export
521531
normalize.by.ruv_r <- function(X, group, ...) {
522-
Xt <- t(X)
523532
if (!is.factor(group)) group <- factor(group)
524-
design <- model.matrix(~group, data=as.data.frame(Xt))
533+
design <- model.matrix(~group, data=as.data.frame(X))
525534

526-
y <- edgeR::DGEList(counts=Xt, group=group)
535+
y <- edgeR::DGEList(counts=X, group=group)
527536
y <- edgeR::calcNormFactors(y, method="upperquartile")
528537
y <- edgeR::estimateGLMCommonDisp(y, design)
529538
y <- edgeR::estimateGLMTagwiseDisp(y, design)
530539

531540
fit <- edgeR::glmFit(y, design)
532541
res <- residuals(fit, type="deviance")
533542
if (is.null(res)) {
534-
message(str(y))
543+
message(str(y))
535544
}
536-
537-
Xt.normed <- log(Xt +1) %>%
538-
RUVSeq::RUVr(1:nrow(Xt), k=1, res, round = FALSE, isLog = TRUE) %>%
539-
`$`('normalizedCounts') %>%
540-
exp() %>%
541-
add(-1) %>%
542-
t() %>%
543-
return()
545+
X.normed <- RUVSeq::RUVr(X, 1:nrow(X), k=1, res, round = FALSE)$normalizedCounts
546+
return(X.normed)
544547
}
545548

546549
#' generic function for cross-validation

0 commit comments

Comments
 (0)