Skip to content

Commit ad9df44

Browse files
updated
1 parent dad5f52 commit ad9df44

File tree

156 files changed

+3079
-352
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

156 files changed

+3079
-352
lines changed

R/.Rbuildignore

Lines changed: 0 additions & 2 deletions
This file was deleted.

R/R/data.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#' Test data for scrabble
2+
#'
3+
#' @description "demo_data" is a data list with the length of 3. The first element in
4+
#' the list is generated drop-out scRNAseq data with 800 genes and 1000 cells. The second
5+
#' element in the list is the generated bulk RNAseq data with 800 genes. The third
6+
#' element is the true scRNAseq data without dropouts. The steps of generating the data
7+
#' is shown in Details section.
8+
#'
9+
#' @usage demo_data
10+
#'
11+
#' @author Tao Peng, Kai Tan
12+
#'
13+
#' @details The data set was generated from the well-developed R package Splatter.
14+
#'
15+
"demo_data"

R/R/plotting_functions.R

100755100644
File mode changed.

R/R/scrabble.R

100755100644
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@
4242
#'
4343
scrabble <- function(data,
4444
parameter,
45-
nIter = 60,
46-
error_out_threshold = 1e-7,
47-
nIter_inner = 100,
48-
error_inner_threshold = 1e-5){
45+
nIter = 20,
46+
error_out_threshold = 1e-4,
47+
nIter_inner = 20,
48+
error_inner_threshold = 1e-4){
4949

5050
# Use the sparse matrix to store the matrix
5151
Y <- as(t(as.matrix(data[[1]])), "dgCMatrix")

R/R/sysdata.rda

-3.17 MB
Binary file not shown.

R/R/test_scrabble.Rmd

100755100644
File mode changed.
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ LinkingTo: Rcpp, RcppEigen, gridExtra
2323
RoxygenNote: 6.1.1
2424
Suggests: knitr,rmarkdown,BiocStyle
2525
VignetteBuilder: knitr
26-
Built: R 3.5.1; x86_64-redhat-linux-gnu; 2019-03-04 01:23:28 UTC; unix
26+
NeedsCompilation: yes
27+
Packaged: 2019-03-04 15:22:00 UTC; taopeng
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
\documentclass[a4paper]{book}
2+
\usepackage[times,inconsolata,hyper]{Rd}
3+
\usepackage{makeidx}
4+
\usepackage[latin1]{inputenc} % @SET ENCODING@
5+
% \usepackage{graphicx} % @USE GRAPHICX@
6+
\makeindex{}
7+
\begin{document}
8+
\chapter*{}
9+
\begin{center}
10+
{\textbf{\huge Package}}
11+
\par\bigskip{\large \today}
12+
\end{center}
13+
\Rdcontents{\R{} topics documented:}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
\documentclass[a4paper]{book}
2+
\usepackage[times,inconsolata,hyper]{Rd}
3+
\usepackage{makeidx}
4+
\usepackage[latin1]{inputenc} % @SET ENCODING@
5+
% \usepackage{graphicx} % @USE GRAPHICX@
6+
\makeindex{}
7+
\begin{document}
8+
\chapter*{}
9+
\begin{center}
10+
{\textbf{\huge Package}}
11+
\par\bigskip{\large \today}
12+
\end{center}
13+
\Rdcontents{\R{} topics documented:}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
\documentclass[a4paper]{book}
2+
\usepackage[times,inconsolata,hyper]{Rd}
3+
\usepackage{makeidx}
4+
\usepackage[latin1]{inputenc} % @SET ENCODING@
5+
% \usepackage{graphicx} % @USE GRAPHICX@
6+
\makeindex{}
7+
\begin{document}
8+
\chapter*{}
9+
\begin{center}
10+
{\textbf{\huge Package}}
11+
\par\bigskip{\large \today}
12+
\end{center}
13+
\Rdcontents{\R{} topics documented:}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
asdgCMatrix_ <- function(XX_) {
5+
.Call(`_SCRABBLE_asdgCMatrix_`, XX_)
6+
}
7+
8+
mSparse <- function(As, bs) {
9+
.Call(`_SCRABBLE_mSparse`, As, bs)
10+
}
11+
12+
mMatrix <- function(As, bs) {
13+
.Call(`_SCRABBLE_mMatrix`, As, bs)
14+
}
15+
16+
mSparseT <- function(As, bs) {
17+
.Call(`_SCRABBLE_mSparseT`, As, bs)
18+
}
19+
20+
mMatrixT <- function(As, bs) {
21+
.Call(`_SCRABBLE_mMatrixT`, As, bs)
22+
}
23+
24+
getZ <- function(As, bs) {
25+
.Call(`_SCRABBLE_getZ`, As, bs)
26+
}
27+
28+
getA <- function(As, betas, gammas, ns) {
29+
.Call(`_SCRABBLE_getA`, As, betas, gammas, ns)
30+
}
31+
32+
getB <- function(Ds, Zs, Ys, betas) {
33+
.Call(`_SCRABBLE_getB`, Ds, Zs, Ys, betas)
34+
}
35+
36+
ToDense <- function(newXs) {
37+
.Call(`_SCRABBLE_ToDense`, newXs)
38+
}
39+
40+
ToSparse <- function(newXs) {
41+
.Call(`_SCRABBLE_ToSparse`, newXs)
42+
}
43+
44+
cDescent <- function(gamma_Y_B_Lambdas, As, zoness, newXs, nIters, error_thresholds) {
45+
.Call(`_SCRABBLE_cDescent`, gamma_Y_B_Lambdas, As, zoness, newXs, nIters, error_thresholds)
46+
}
47+
48+
getS <- function(newXs, Lambdas, gammas) {
49+
.Call(`_SCRABBLE_getS`, newXs, Lambdas, gammas)
50+
}
51+
52+
getY <- function(ss, Us, Vs) {
53+
.Call(`_SCRABBLE_getY`, ss, Us, Vs)
54+
}
55+
56+
calculateError <- function(Xs, newXs, m1s, n1s) {
57+
.Call(`_SCRABBLE_calculateError`, Xs, newXs, m1s, n1s)
58+
}
59+
60+
updateLambda <- function(Lambdas, newXs, newYs, gammas) {
61+
.Call(`_SCRABBLE_updateLambda`, Lambdas, newXs, newYs, gammas)
62+
}
63+
64+
recoverData <- function(newXs) {
65+
.Call(`_SCRABBLE_recoverData`, newXs)
66+
}
67+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#' Test data for scrabble
2+
#'
3+
#' @description "demo_data" is a data list with the length of 3. The first element in
4+
#' the list is generated drop-out scRNAseq data with 800 genes and 1000 cells. The second
5+
#' element in the list is the generated bulk RNAseq data with 800 genes. The third
6+
#' element is the true scRNAseq data without dropouts. The steps of generating the data
7+
#' is shown in Details section.
8+
#'
9+
#' @usage demo_data
10+
#'
11+
#' @author Tao Peng, Kai Tan
12+
#'
13+
#' @details The data set was generated from the well-developed R package Splatter.
14+
#'
15+
"demo_data"
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# plot the figures of data
2+
plot_data <- function(data,name){
3+
limit <- c(0,5)
4+
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
5+
print(dim(data))
6+
colnames(data) <- NULL
7+
rownames(data) <- NULL
8+
longData<-melt(as.matrix(data))
9+
pl <- ggplot(longData, aes(x = Var2, y = Var1)) +
10+
geom_raster(aes(fill=value)) +
11+
scale_colour_gradient2(limits=c(0, 5)) +
12+
scale_fill_gradientn(colours = c("white", "blue", "red"), values = c(0,0.6,1)) +
13+
theme_bw() +
14+
scale_y_discrete(name ="Genes") +
15+
ggtitle(name) +
16+
scale_x_discrete(name ="Cells") +
17+
theme(panel.grid.major = element_blank(),
18+
legend.position="bottom",
19+
panel.grid.minor = element_blank(),
20+
panel.background = element_blank(),
21+
line = element_blank(),
22+
plot.title = element_text(family = "Helvetica", face = "bold", size = (8)),
23+
axis.title = element_text(family = "Helvetica", size = (6)),
24+
axis.text.x = element_blank(),
25+
axis.text.y = element_blank()) +
26+
theme(legend.text=element_text(size=6),legend.title = element_text(size = 6))
27+
28+
return(pl)
29+
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#' Runs SCRABBLE
2+
#'
3+
#' SCRABBLE imputes drop-out data by optimizing an objective function that consists of three terms.
4+
#' The first term ensures that imputed values for genes with nonzero expression remain as close to their
5+
#' original values as possible, thus minimizing unwanted bias towards expressed genes. The second term ensures
6+
#' the rank of the imputed data matrix to be as small as possible. The rationale is that we only expect a
7+
#' limited number of distinct cell types in the samples. The third term operates on the bulk RNA-Seq data.
8+
#' It ensures consistency between the average gene expression of the aggregated imputed data and the
9+
#' average gene expression of the bulk RNA-Seq data. We developed 58 a convex optimization algorithm to minimize
10+
#' the objective function.
11+
#'
12+
#'
13+
#' @param data the input data list. The input
14+
#' data is a list of two datasets, scRNAseq and bulk RNAseq.
15+
#'
16+
#' @param parameter the vector of parameters. The first parameter is the value of alpha in the mathematical model
17+
#' , the second one is the value of beta in the mathematical model.
18+
#'
19+
#' @param nIter the maximum iterations, the default is 60.
20+
#'
21+
#' @param error_out_threshold the threshold of the error between the current imputed matrix and the previous one.
22+
#' Default is 1e-5.
23+
#'
24+
#' @param nIter_inner the maximum interations of calculating the sub-optimization problem. Default is 60.
25+
#'
26+
#' @param error_inner_threshold the threshold of the error between the current updated matrix and the previous one.
27+
#' Default is 1e-5.
28+
#'
29+
#' @examples
30+
#' # Set up the parameter used in SCRABBLE
31+
#' parameter <- c(1, 1e-6, 1e-4)
32+
#'
33+
#' # Run SCRABLE
34+
#' result <- scrabble(data,parameter = parameter)
35+
#'
36+
#' @return A data matrix with the same size of the input scRNAseq data
37+
#'
38+
#' @rdname SCRABBLE
39+
#'
40+
#' @export
41+
#'
42+
#'
43+
scrabble <- function(data,
44+
parameter,
45+
nIter = 20,
46+
error_out_threshold = 1e-4,
47+
nIter_inner = 20,
48+
error_inner_threshold = 1e-4){
49+
50+
# Use the sparse matrix to store the matrix
51+
Y <- as(t(as.matrix(data[[1]])), "dgCMatrix")
52+
53+
# Transpose the data matrix for the optimization
54+
zones <- (Y > 0)*1
55+
n_row <- nrow(Y)
56+
n_col <- ncol(Y)
57+
58+
print(paste0('SCRABBLE begins the imputation of the data with ',n_col,' genes and ', n_row, ' cells'))
59+
# Define the parameters
60+
alpha <- parameter[1]
61+
beta <- parameter[2]
62+
gamma <- parameter[3]
63+
64+
# prepare the bulk data
65+
if (isempty(data[[2]])){
66+
beta <- 0*beta
67+
Z <- matrix(1, nrow = 1, ncol = n_col)
68+
}else{
69+
Z <- getZ(as.matrix(data[[2]]), n_row)
70+
}
71+
72+
# prepare the matrices for the following iteration
73+
D <- matrix(1, nrow = 1, ncol = n_row)
74+
A <- getA(D, beta, gamma, n_row)
75+
B <- getB(D, Z, Y, beta)
76+
77+
# initialize the X,Y,and Lambda for the iteration
78+
X <- Y
79+
newX <- X
80+
newY <- as.matrix(Y)
81+
Lambda <- matrix(0, nrow = n_row, ncol = n_col)
82+
83+
# set up the thresholds for iterations and the errors
84+
k <- 0
85+
error <- 1
86+
87+
print(paste0('Imputation initialization is finished'))
88+
print(paste0('... ....'))
89+
while((k < nIter) & (error > error_out_threshold)){
90+
91+
# update the X
92+
X <- newX
93+
Y <- newY
94+
95+
gamma_Y_B_Lambda <- gamma*Y + B - Lambda
96+
97+
newX <- ToDense(newX)
98+
99+
newX <- cDescent(gamma_Y_B_Lambda,
100+
A,
101+
zones,
102+
newX,
103+
nIter_inner,
104+
error_inner_threshold)
105+
106+
newX <- ToSparse(newX)
107+
108+
# STV
109+
S <- getS(newX, Lambda, gamma)
110+
111+
tau <- alpha/gamma
112+
113+
result <- svt(S, lambda = tau)
114+
115+
s <- result[[1]] - tau
116+
117+
s[s < 0] <- 0
118+
newY <- getY(s, result[[2]], result[[3]])
119+
120+
error <- norm(as.matrix(log10(X + 1) - log10(newX + 1)), type = c("2"))/norm(as.matrix(log10(X + 1)), type = c("2"))
121+
122+
if (k == 0){error = 1}
123+
k <- k + 1
124+
Lambda <- updateLambda(Lambda, newX, newY, gamma)
125+
}
126+
127+
print(paste0('Imputation is finished'))
128+
129+
return(recoverData(newX))
130+
131+
}

0 commit comments

Comments
 (0)