@@ -427,19 +427,18 @@ best.cluster <- function(clusters.df, features = c('Rank1Residuals', 'size'), we
427427}
428428
429429# ' Normalizing (global-scaling) using references
430- # '
431- # ' normalize a read count matrix given the set of reference genes identified by id
432- # ' @param X a read-count matrix of the form samples x genes(transcripts)
430+ # '
431+ # ' Normalize a read count matrix given the set of reference genes identified by id
432+ # ' @param X a read-count matrix of the form genes x samples
433433# ' @param ref.idx an integer vector specifying the column indices of X to be used as reference
434- # ' @param scale (default to TRUE) whether to scale the normalizing factors such that their geometric mean is 1
435434# ' @export
436435normalize.by.refs <- function (X , ref.idx , scale = TRUE ) {
437436 if (length(ref.idx ) == 1 ) { # need to be treated specially since dim(X) reduces to NULL and cause error in apply
438- Xref = matrix (X [, ref.idx ], ncol = 1 )
437+ Xref = matrix (X [ref.idx , ], nrow = 1 )
439438 } else {
440- Xref = X [, ref.idx ]
439+ Xref = X [ref.idx , ]
441440 }
442- normFactors = apply(Xref ,MARGIN = 1 ,FUN = sum )
441+ normFactors = colSums( Xref ) # apply(Xref,MARGIN = 2 ,FUN = sum)
443442 if (scale ) {
444443 normFactors = normFactors / geom.mean(normFactors )
445444 }
@@ -450,32 +449,47 @@ normalize.by.refs <- function(X, ref.idx, scale = TRUE) {
450449 return (NULL )
451450 }
452451
453- X.norm = t(sapply(1 : length(normFactors ), FUN = function (i ) {
454- return (X [i ,] / normFactors [i ])
455- }))
452+ X.norm = sweep(X , MARGIN = 2 , STATS = normFactors , FUN = ' /' )
456453 colnames(X.norm ) = colnames(X )
457454 rownames(X.norm ) = rownames(X )
458455 return (X.norm )
459456}
460457
458+ # ' normalize.by.scaleFactors
459+ # '
460+ # ' @param X count matrix in the form genes x samples
461+ # ' @scaleFactors a vector of scaling factor, must be the same length as the number of samples
462+ # ' @export
463+ normalize.by.scaleFactors <- function (X , scaleFactors ) {
464+ if (length(scaleFactors ) != ncol(X )) {
465+ stop(" scaleFactors should have the same length as number of samples in the input matrix." )
466+ }
467+ X.normed = sapply(1 : length(scaleFactors ), FUN = function (j ) {
468+ return (X [,j ]/ scaleFactors [j ])
469+ })
470+ `colnames<-`(X.normed , colnames(X ))
471+ return (X.normed )
472+ }
473+
474+
461475# ' Normalize by Upper Quantile
462476# '
463477# ' @import edgeR
464478# ' @export
465479normalize.by.uq <- function (X , ... ) {
466- effLibSizes = apply( X , 1 , sum ) * edgeR :: calcNormFactors(t( X ) , method = ' upperquartile' , ... ) # effective library sizes
467- sweep(X , 1 , mean( effLibSizes ) / effLibSizes , " * " ) %> %
468- return ()
480+ effLibSizes = colSums( X ) * edgeR :: calcNormFactors(X , method = ' upperquartile' , group = group , ... ) # effective library sizes
481+ sweep(X , 2 , effLibSizes , " / " ) %> %
482+ return ()
469483}
470484
471485# ' Call calcNormFactors by edgeR
472486# '
473487# ' @import edgeR
474488# ' @export
475489normalize.by.tmm <- function (X ,... ) {
476- effLibSizes = apply( X , 1 , sum ) * edgeR :: calcNormFactors(t( X ) , method = ' TMM' , group = group , ... ) # effective library sizes
477- sweep(X , 1 , mean( effLibSizes ) / effLibSizes , " * " ) %> %
478- return ()
490+ effLibSizes = colSums( X ) * edgeR :: calcNormFactors(X , method = ' TMM' , group = group , ... ) # effective library sizes
491+ sweep(X , 2 , effLibSizes , " / " ) %> %
492+ return ()
479493}
480494
481495# ' Call estimateSizeFactorsForMatrix by DESeq2
@@ -484,10 +498,8 @@ normalize.by.tmm <- function(X,...) {
484498# ' @param X read count matrix with samples in rows and genes in columns
485499# ' @export
486500normalize.by.deseq <- function (X , ... ) {
487- X %> %
488- t() %> %
489- DESeq2 :: estimateSizeFactorsForMatrix(... ) %> %
490- sweep(X , 1 , . , " /" ) %> %
501+ DESeq2 :: estimateSizeFactorsForMatrix(X ) %> %
502+ sweep(X , 2 , . , " /" ) %> %
491503 return ()
492504}
493505
@@ -496,21 +508,19 @@ normalize.by.deseq <- function(X, ...) {
496508# ' @import PoissonSeq
497509# ' @export
498510normalize.by.poissonseq <- function (X , ... ) {
499- PoissonSeq :: PS.Est.Depth(t( X ) , ... ) %> %
500- sweep(X , 1 , . , " /" ) %> %
501- return ()
511+ PoissonSeq :: PS.Est.Depth(X , ... ) %> %
512+ sweep(X , 2 , . , " /" ) %> %
513+ return ()
502514}
503515
504516# ' Normalize by DEGES
505517# '
506518# ' @import TCC
507519# ' @export
508520normalize.by.deges <- function (X , group , norm.method = ' tmm' , test.method = ' edger' , iteration = 1 ) {
509- t(X ) %> %
510- TCC :: TCC(group ) %> %
521+ TCC :: TCC(X , group ) %> %
511522 TCC :: calcNormFactors(norm.method = ' tmm' , test.method = ' edger' , iteration = iteration ) %> %
512523 TCC :: getNormalizedData() %> %
513- t() %> %
514524 return ()
515525}
516526
@@ -519,28 +529,21 @@ normalize.by.deges <- function(X, group, norm.method = 'tmm', test.method = 'edg
519529# ' @import RUVSeq
520530# ' @export
521531normalize.by.ruv_r <- function (X , group , ... ) {
522- Xt <- t(X )
523532 if (! is.factor(group )) group <- factor (group )
524- design <- model.matrix(~ group , data = as.data.frame(Xt ))
533+ design <- model.matrix(~ group , data = as.data.frame(X ))
525534
526- y <- edgeR :: DGEList(counts = Xt , group = group )
535+ y <- edgeR :: DGEList(counts = X , group = group )
527536 y <- edgeR :: calcNormFactors(y , method = " upperquartile" )
528537 y <- edgeR :: estimateGLMCommonDisp(y , design )
529538 y <- edgeR :: estimateGLMTagwiseDisp(y , design )
530539
531540 fit <- edgeR :: glmFit(y , design )
532541 res <- residuals(fit , type = " deviance" )
533542 if (is.null(res )) {
534- message(str(y ))
543+ message(str(y ))
535544 }
536-
537- Xt.normed <- log(Xt + 1 ) %> %
538- RUVSeq :: RUVr(1 : nrow(Xt ), k = 1 , res , round = FALSE , isLog = TRUE ) %> %
539- `$`(' normalizedCounts' ) %> %
540- exp() %> %
541- add(- 1 ) %> %
542- t() %> %
543- return ()
545+ X.normed <- RUVSeq :: RUVr(X , 1 : nrow(X ), k = 1 , res , round = FALSE )$ normalizedCounts
546+ return (X.normed )
544547}
545548
546549# ' generic function for cross-validation
0 commit comments