Skip to content

Commit b3d9c3d

Browse files
committed
updated
1 parent ba2b134 commit b3d9c3d

File tree

5 files changed

+22
-116
lines changed

5 files changed

+22
-116
lines changed

R/R/data.R

100644100755
Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#' Test data for scrabble
22
#'
33
#' @description "data" is a data list with the length of 3. The first element in
4-
#' the list is generated drop-out scRNAseq data with 732 genes and 1000 cells. The second
5-
#' element in the list is the generated bulk RNAseq data with 732 genes. The third
4+
#' the list is generated drop-out scRNAseq data with 800 genes and 1000 cells. The second
5+
#' element in the list is the generated bulk RNAseq data with 800 genes. The third
66
#' element is the true scRNAseq data without dropouts. The steps of generating the data
77
#' is shown in Details section.
88
#'
@@ -12,22 +12,6 @@
1212
#'
1313
#' @author Tao Peng, Kai Tan
1414
#'
15-
#' @details The data set was generated from down sampling from bulk RNAseq data.
16-
#' We used the bulk RNA-Seq data set of mouse hair follicles (GSE85039).
17-
#' In total, the dataset contains 20 different combinations of anatomic
18-
#' sites and developmental time points, thus constituting a high dimensional
19-
#' measurement space. We used the following procedures to generate the
20-
#' drop-out datasets. 1) We selected 732 genes that are
21-
#' differentially expressed in the 20 conditions based on ANOVA analysis.
22-
#' 2) We randomly selected 10 out of the 20 conditions. 3) For each condition,
23-
#' we generated 100 resampled datasets. The means and standard deviations of
24-
#' genes were calculated for each condition based on the 100 resampled datasets.
25-
#' 4) 100 new datasets were generated based on the mean and the standard deviation
26-
#' of each gene. 5) The final data set was obtained by combining 1000 samples
27-
#' representing the 10 conditions. This 1000x732 matrix now represents 1000 cells
28-
#' and 732 genes. 6) we make the drop-out rate of each gene in each cell following
29-
#' a double exponential function . Zero values are introduced into the simulated
30-
#' data for each gene in each cell based on the Bernoulli distribution defined by
31-
#' the corresponding drop-out rate.
15+
#' @details The data set was generated from the well-developed R package Splatter.
3216
#'
3317
"data"

R/R/data_result_4.mat

-5.22 MB
Binary file not shown.

R/R/plotting_functions.R

100644100755
Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# plot the figures of data
22
plot_data <- function(data,name){
3-
limit <- c(0,4)
3+
limit <- c(0,5)
44
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
5+
print(dim(data))
56
colnames(data) <- NULL
67
rownames(data) <- NULL
78
longData<-melt(as.matrix(data))
89
pl <- ggplot(longData, aes(x = Var2, y = Var1)) +
910
geom_raster(aes(fill=value)) +
10-
scale_colour_gradient2(limits=c(0, 4)) +
11-
scale_fill_gradient2(limits=c(0, 4),low = "blue", mid = "white",high = "red", midpoint = 2) +
11+
scale_colour_gradient2(limits=c(0, 5)) +
12+
scale_fill_gradientn(colours = c("white", "blue", "red"), values = c(0,0.6,1)) +
1213
theme_bw() +
1314
scale_y_discrete(name ="Genes") +
1415
ggtitle(name) +
@@ -18,10 +19,11 @@ plot_data <- function(data,name){
1819
panel.grid.minor = element_blank(),
1920
panel.background = element_blank(),
2021
line = element_blank(),
21-
plot.title = element_text(family = "Helvetica", face = "bold", size = (12)),
22-
axis.title = element_text(family = "Helvetica", size = (10)),
22+
plot.title = element_text(family = "Helvetica", face = "bold", size = (8)),
23+
axis.title = element_text(family = "Helvetica", size = (6)),
2324
axis.text.x = element_blank(),
2425
axis.text.y = element_blank()) +
25-
theme(legend.text=element_text(size=10),legend.title = element_text(size = 10))
26+
theme(legend.text=element_text(size=6),legend.title = element_text(size = 6))
27+
2628
return(pl)
2729
}

R/R/scrabble.R

100644100755
Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,28 @@
1010
#' the objective function.
1111
#'
1212
#'
13-
#' @param data the input data list. There are two cases SCRABBLE could handle. The first one is that the input
14-
#' data is a list of two datasets, scRNAseq and bulk RNAseq. The second one is scRNAseq only. The third one is
15-
#' the parameter for penalty term
13+
#' @param data the input data list. The input
14+
#' data is a list of two datasets, scRNAseq and bulk RNAseq.
1615
#'
1716
#' @param parameter the vector of parameters. The first parameter is the value of alpha in the mathematical model
18-
#' and the second one is the value of beta in the mathematical model.
17+
#' , the second one is the value of beta in the mathematical model.
1918
#'
20-
#' @param nIter the maximum iterations.
19+
#' @param nIter the maximum iterations, the default is 60.
2120
#'
2221
#' @param error_out_threshold the threshold of the error between the current imputed matrix and the previous one.
2322
#' Default is 1e-5.
2423
#'
25-
#' @param nIter_inner the maximum interations of calculating the sub-optimization problem. Default is 5.
24+
#' @param nIter_inner the maximum interations of calculating the sub-optimization problem. Default is 60.
2625
#'
2726
#' @param error_inner_threshold the threshold of the error between the current updated matrix and the previous one.
2827
#' Default is 1e-5.
2928
#'
3029
#' @examples
3130
#' # Set up the parameter used in SCRABBLE
32-
#' parameter <- c(100,2e-7)
33-
#' nIter <- 100
31+
#' parameter <- c(1, 1e-6, 1e-4)
3432
#'
3533
#' # Run SCRABLE
36-
#' result <- scrabble(data,parameter = parameter, nIter = nIter)
34+
#' result <- scrabble(data,parameter = parameter)
3735
#'
3836
#' @return A data matrix with the same size of the input scRNAseq data
3937
#'
@@ -44,9 +42,9 @@
4442
#'
4543
scrabble <- function(data,
4644
parameter,
47-
nIter,
48-
error_out_threshold = 1e-5,
49-
nIter_inner = 20,
45+
nIter = 60,
46+
error_out_threshold = 1e-7,
47+
nIter_inner = 100,
5048
error_inner_threshold = 1e-5){
5149

5250
# Use the sparse matrix to store the matrix

R/R/test_scrabble.Rmd

100644100755
Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -5,90 +5,12 @@ date: "5/18/2018"
55
output: html_document
66
---
77

8-
```{r setup, include=FALSE}
9-
# setwd("/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code")
10-
# library(rsvd)
11-
# library("corpcor")
12-
# library(rARPACK)
13-
# library(pracma)
14-
# library(rapport)
15-
.libPaths("/mnt/isilon/cbmi/tan_lab/pengt/Documents/R_3_5")
16-
library(SCRABBLE)
17-
library(VennDiagram)
18-
library(Rtsne)
19-
library(dplyr)
20-
library(DT)
21-
library(ggpubr)
22-
library(ggsignif)
23-
library(biomaRt)
24-
library(scatterplot3d)
25-
library(R.matlab)
26-
library(scImpute)
27-
library(ggplot2)
28-
library(reshape2)
29-
library(ggfortify)
30-
library(gridExtra)
31-
library(viridis)
32-
library(pheatmap)
33-
library(RColorBrewer)
34-
library(dendsort)
35-
library(entropy)
36-
library(DrImpute)
37-
library(Rmagic)
38-
```
39-
40-
```{r}
41-
data_sc <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_sc.csv",header = F)
42-
data_bulk <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_bulk.csv",header = F)
43-
data_true <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_true.csv",header = F)
44-
```
45-
46-
```{r}
47-
data1 <- list()
48-
data1[[1]] <- data$data_dropout
49-
data1[[2]] <- data$data_bulk
50-
```
51-
52-
53-
```{r}
54-
parameter <- c(10,1e-5,1e-4)
55-
nIter <- 20
56-
```
57-
58-
```{r}
59-
result <- scrabble(data1,
60-
parameter = parameter,
61-
nIter = 30,
62-
error_out_threshold = 1e-7,
63-
nIter_inner = 100,
64-
error_inner_threshold = 1e-5)
65-
```
66-
67-
```{r}
68-
data_matlab_tmp <- readMat('data_result_4.mat')
69-
data_matlab <- data_matlab_tmp$dataRecovered
70-
71-
data1 <- readMat('demo_data_HF.mat')
72-
73-
```
74-
75-
```{r,fig.height=3,fig.width=10}
76-
pl <- list()
77-
pl[[1]] <- plot_data(log10(data_true + 1), 'True Data')
78-
pl[[2]] <- plot_data(log10(result+1),'Imputed Data')
79-
pl[[3]] <- plot_data(log10(data_sc+1),'Dropout Data')
80-
pl[[4]] <- plot_data(log10(data_matlab + 1), 'Matlab Data')
81-
p <- grid.arrange(grobs = pl,ncol = 4)
82-
```
83-
84-
85-
868
# This is used to generate the help file
879
```{r}
8810
devtools::document(roclets=c('rd', 'vignette'))
8911
# generate the manual
9012
system("R CMD Rd2pdf . --title=Package SCRABBLE --output=man/manual.pdf --force --no-clean --internals")
9113
# generate the file
92-
devtools::use_vignette("my-vignette")
14+
devtools::build_vignettes()
9315
```
9416

0 commit comments

Comments
 (0)