updated

tanlabcode · tanlabcode · commit b3d9c3dfc0da · 2019-01-11T13:05:13.000-05:00
diff --git a/R/R/data.R b/R/R/data.R
@@ -1,8 +1,8 @@
 #' Test data for scrabble
 #'
 #' @description  "data" is a data list with the length of 3. The first element in
-#' the list is generated drop-out scRNAseq data with 732 genes and 1000 cells. The second
-#' element in the list is the generated bulk RNAseq data with 732 genes. The third
+#' the list is generated drop-out scRNAseq data with 800 genes and 1000 cells. The second
+#' element in the list is the generated bulk RNAseq data with 800 genes. The third
 #' element is the true scRNAseq data without dropouts. The steps of generating the data
 #' is shown in Details section.
 #'
@@ -12,22 +12,6 @@
 #'
 #' @author Tao Peng, Kai Tan
 #'
-#' @details The data set was generated from down sampling from bulk RNAseq data.
-#' We used the bulk RNA-Seq data set of mouse hair follicles (GSE85039).
-#' In total, the dataset contains 20 different combinations of anatomic
-#' sites and developmental time points, thus constituting a high dimensional
-#' measurement space. We used the following procedures to generate the
-#' drop-out datasets. 1) We selected 732 genes that are
-#' differentially expressed in the 20 conditions based on ANOVA analysis.
-#' 2) We randomly selected 10 out of the 20 conditions.  3) For each condition,
-#' we generated 100 resampled datasets. The means and standard deviations of
-#' genes were calculated for each condition based on the 100 resampled datasets.
-#' 4) 100 new datasets were generated based on the mean and the standard deviation
-#' of each gene. 5) The final data set was obtained by combining 1000 samples
-#' representing the 10 conditions. This 1000x732 matrix now represents 1000 cells
-#' and 732 genes. 6) we make the drop-out rate of each gene in each cell following
-#' a double exponential function . Zero values are introduced into the simulated
-#' data for each gene in each cell based on the Bernoulli distribution defined by
-#' the corresponding drop-out rate.
+#' @details The data set was generated from the well-developed R package Splatter.
 #'
 "data"
diff --git a/R/R/data_result_4.mat b/R/R/data_result_4.mat
diff --git a/R/R/plotting_functions.R b/R/R/plotting_functions.R
@@ -1,14 +1,15 @@
 # plot the figures of data
 plot_data <- function(data,name){
-  limit <- c(0,4)
+  limit <- c(0,5)
   myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
+  print(dim(data))
   colnames(data) <- NULL
   rownames(data) <- NULL
   longData<-melt(as.matrix(data))
   pl <- ggplot(longData, aes(x = Var2, y = Var1)) +
     geom_raster(aes(fill=value)) +
-    scale_colour_gradient2(limits=c(0, 4)) +
-    scale_fill_gradient2(limits=c(0, 4),low = "blue", mid = "white",high = "red", midpoint = 2) +
+    scale_colour_gradient2(limits=c(0, 5)) +
+    scale_fill_gradientn(colours = c("white", "blue", "red"), values = c(0,0.6,1)) +
     theme_bw()  +
     scale_y_discrete(name ="Genes") +
     ggtitle(name) +
@@ -18,10 +19,11 @@ plot_data <- function(data,name){
           panel.grid.minor = element_blank(),
           panel.background = element_blank(),
           line = element_blank(),
-          plot.title = element_text(family = "Helvetica", face = "bold", size = (12)),
-          axis.title = element_text(family = "Helvetica", size = (10)),
+          plot.title = element_text(family = "Helvetica", face = "bold", size = (8)),
+          axis.title = element_text(family = "Helvetica", size = (6)),
           axis.text.x = element_blank(),
           axis.text.y = element_blank()) +
-    theme(legend.text=element_text(size=10),legend.title = element_text(size = 10))
+    theme(legend.text=element_text(size=6),legend.title = element_text(size = 6))
+
   return(pl)
 }
diff --git a/R/R/scrabble.R b/R/R/scrabble.R
@@ -10,30 +10,28 @@
 #' the objective function.
 #'
 #'
-#' @param data the input data list. There are two cases SCRABBLE could handle. The first one is that the input
-#' data is a list of two datasets, scRNAseq and bulk RNAseq. The second one is scRNAseq only. The third one is
-#' the parameter for penalty term
+#' @param data the input data list. The input
+#' data is a list of two datasets, scRNAseq and bulk RNAseq.
 #'
 #' @param parameter the vector of parameters. The first parameter is the value of alpha in the mathematical model
-#' and the second one is the value of beta in the mathematical model.
+#' , the second one is the value of beta in the mathematical model.
 #'
-#' @param nIter the maximum iterations.
+#' @param nIter the maximum iterations, the default is 60.
 #'
 #' @param error_out_threshold the threshold of the error between the current imputed matrix and the previous one.
 #' Default is 1e-5.
 #'
-#' @param nIter_inner the maximum interations of calculating the sub-optimization problem. Default is 5.
+#' @param nIter_inner the maximum interations of calculating the sub-optimization problem. Default is 60.
 #'
 #' @param error_inner_threshold the threshold of the error between the current updated matrix and the previous one.
 #' Default is 1e-5.
 #'
 #' @examples
 #' # Set up the parameter used in SCRABBLE
-#' parameter <- c(100,2e-7)
-#' nIter <- 100
+#' parameter <- c(1, 1e-6, 1e-4)
 #'
 #' # Run SCRABLE
-#' result <- scrabble(data,parameter = parameter, nIter = nIter)
+#' result <- scrabble(data,parameter = parameter)
 #'
 #' @return A data matrix with the same size of the input scRNAseq data
 #'
@@ -44,9 +42,9 @@
 #'
 scrabble <- function(data,
                      parameter,
-                     nIter,
-                     error_out_threshold = 1e-5,
-                     nIter_inner = 20,
+                     nIter = 60,
+                     error_out_threshold = 1e-7,
+                     nIter_inner = 100,
                      error_inner_threshold = 1e-5){
 
   # Use the sparse matrix to store the matrix
diff --git a/R/R/test_scrabble.Rmd b/R/R/test_scrabble.Rmd
@@ -5,90 +5,12 @@ date: "5/18/2018"
 output: html_document
 ---
 
-```{r setup, include=FALSE}
-# setwd("/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code")
-# library(rsvd)
-# library("corpcor")
-# library(rARPACK)
-# library(pracma)
-# library(rapport)
-.libPaths("/mnt/isilon/cbmi/tan_lab/pengt/Documents/R_3_5")
-library(SCRABBLE)
-library(VennDiagram)
-library(Rtsne)
-library(dplyr)
-library(DT)
-library(ggpubr)
-library(ggsignif)
-library(biomaRt)
-library(scatterplot3d)
-library(R.matlab)
-library(scImpute)
-library(ggplot2)
-library(reshape2)
-library(ggfortify)
-library(gridExtra)
-library(viridis)
-library(pheatmap)
-library(RColorBrewer)
-library(dendsort)
-library(entropy)
-library(DrImpute)
-library(Rmagic)
-```
-
-```{r}
-data_sc <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_sc.csv",header = F)
-data_bulk <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_bulk.csv",header = F)
-data_true <- read.csv(file = "/mnt/isilon/tan_lab/pengt/Documents/Chop/Revision_Imputation/SCRABBLE/R_package/code/test_data_true.csv",header = F)
-```
-
-```{r}
-data1 <- list()
-data1[[1]] <- data$data_dropout
-data1[[2]] <- data$data_bulk
-```
-
-
-```{r}
-parameter <- c(10,1e-5,1e-4)
-nIter <- 20
-```
-
-```{r}
-result <- scrabble(data1,
-                   parameter = parameter, 
-                   nIter = 30,
-                   error_out_threshold = 1e-7, 
-                   nIter_inner = 100,
-                   error_inner_threshold = 1e-5)
-```
-
-```{r}
-data_matlab_tmp <- readMat('data_result_4.mat')
-data_matlab <- data_matlab_tmp$dataRecovered
-
-data1 <- readMat('demo_data_HF.mat')
-
-```
-
-```{r,fig.height=3,fig.width=10}
-pl <- list()
-pl[[1]] <- plot_data(log10(data_true + 1), 'True Data')
-pl[[2]] <- plot_data(log10(result+1),'Imputed Data')
-pl[[3]] <- plot_data(log10(data_sc+1),'Dropout Data')
-pl[[4]] <- plot_data(log10(data_matlab + 1), 'Matlab Data')
-p <- grid.arrange(grobs = pl,ncol = 4)
-```
-
-
-
 # This is used to generate the help file
 ```{r}
 devtools::document(roclets=c('rd', 'vignette'))
 # generate the manual 
 system("R CMD Rd2pdf . --title=Package SCRABBLE --output=man/manual.pdf --force --no-clean --internals")
 # generate the file
-devtools::use_vignette("my-vignette")
+devtools::build_vignettes()
 ```