Skip to content

Commit fd5090a

Browse files
authored
Add files via upload
1 parent 0009a46 commit fd5090a

File tree

1 file changed

+50
-9
lines changed

1 file changed

+50
-9
lines changed

R/BenchMark/ESC_human/analysis_library.R

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
# calculate the variance across the rows or the columns
12
MatVar <- function(x, dim = 1, ...) {
23

4+
# parameter in the function
5+
# x: the input matrix
6+
# dim: determine the variance across the row or column.
7+
# 1 is the one across the row and 2 is the one across the column
8+
39
if(dim == 1){
410

511
rowSums((x - rowMeans(x, ...))^2, ...)/(dim(x)[2] - 1)
@@ -11,22 +17,34 @@ MatVar <- function(x, dim = 1, ...) {
1117
} else stop("Please enter valid dimension")
1218
}
1319

20+
# plot the pathway ratio
1421
plot_pathway_ratio <- function(dataV3, dataType, pathway_name){
1522

23+
# Parameter in the function
24+
# dataV3: the ratio of the different methods, the columns are
25+
# the different methods
26+
# dataType: the cell types
27+
# pathway_name: the name of the pathway database
28+
1629
if(dataType == "NPC"){
17-
# select the raw data, the imputed data by scImpute, scbMC, MAGIC
30+
31+
# define the number of data points
1832
N = dim(dataV3)[2]
1933

34+
# define the lowest levels of the pvalues
2035
hlim = 190
2136

37+
# prepare the data as a vector for the boxplot
2238
dataV1 = data.frame(y = (as.vector(as.matrix(dataV3))))
2339

2440
dataV1$group = rep(c(1:5),N)
2541

42+
# calculate the pvalues
2643
my_comparisons = list( c("1", "5"), c("2", "5"), c("3", "5"), c("4", "5"))
2744

2845
pval = compare_means(y ~ group,data = dataV1, method = "t.test", ref.group = "5")
2946

47+
# plot the boxplots
3048
pl = ggboxplot(dataV1, x = "group", y = "y", fill = "group",
3149
palette = c("#00AFBB","#0000CD", "#E7B800", "#FC4E07", "#6ebb00"),outlier.shape = NA) +
3250
stat_boxplot(geom = "errorbar", width = 0.3) +
@@ -45,19 +63,23 @@ plot_pathway_ratio <- function(dataV3, dataType, pathway_name){
4563

4664
}else{
4765

48-
# select the raw data, the imputed data by scImpute, scbMC, MAGIC
66+
# define the number of data points
4967
N = dim(dataV3)[2]
5068

69+
# define the lowest levels of the pvalues
5170
hlim = 150
5271

72+
# prepare the data as a vector for the boxplot
5373
dataV1 = data.frame(y = (as.vector(as.matrix(dataV3))))
5474

75+
# calculate the pvalues
5576
dataV1$group = rep(c(1:5),N)
5677

5778
my_comparisons = list( c("1", "5"), c("2", "5"), c("3", "5"), c("4", "5"))
5879

5980
pval = compare_means(y ~ group,data = dataV1, method = "t.test", ref.group = "5")
6081

82+
# plot the boxplots
6183
pl = ggboxplot(dataV1, x = "group", y = "y", fill = "group",
6284
palette = c("#00AFBB","#0000CD", "#E7B800", "#FC4E07", "#6ebb00"),outlier.shape = NA) +
6385
stat_boxplot(geom = "errorbar", width = 0.3) +
@@ -83,38 +105,48 @@ plot_pathway_ratio <- function(dataV3, dataType, pathway_name){
83105

84106
generate_index <- function(data_set, pathway_name){
85107

108+
# Parameter in the function
109+
# data_set: the data of cell types
110+
# pathway_name: the name of pathway database
111+
86112
# load the gene
87-
data_gene = fread(file = "data_all/gene_ESC.csv",
88-
header = FALSE)
113+
data_gene = fread(file = "data_all/gene_name.csv",
114+
sep=',', header = FALSE)
89115

116+
# load the scRNAseq data
90117
data_sc = as.matrix(fread(file = paste0("data_all/data_sc_",
91118
data_set,
92119
".csv")))
93120

94-
121+
# calculate the variance across the genes
95122
var0 = MatVar(data_sc,1)
96123

97124
index_sc = var0 > 1e-10
98125

99126
data_gene = data_gene[index_sc,]
100127

101-
102128
n_gene = dim(data_gene)[1]
103129

104-
130+
# determine the pathway numbers
105131
if(pathway_name == "IPA"){
132+
106133
N = 186
134+
107135
}
108136

109137
if(pathway_name == "KEGG"){
138+
110139
N = 186
140+
111141
}
112142

113143
if(pathway_name == "REACTOME"){
144+
114145
N = 674
146+
115147
}
116148

117-
149+
# calculate the index
118150
index = list()
119151

120152
for(i in c(1:N)){
@@ -154,9 +186,13 @@ generate_index <- function(data_set, pathway_name){
154186
)
155187
}
156188

157-
189+
# Calculate the ratio
158190
calculate_ratio <- function(data_set, pathway_name, method_name){
159191

192+
# Parameter in the function
193+
# data_set: the data of cell type
194+
# pathway_name: the name of the pathway
195+
# method_name: the name of the method used for the imputation
160196

161197
load(file = paste0("data_all/",
162198
pathway_name,
@@ -203,19 +239,23 @@ calculate_ratio <- function(data_set, pathway_name, method_name){
203239
"_scrabble_imputation.rds"))
204240
}
205241

242+
# determine the pathway numbers
206243
if(pathway_name == "IPA"){
207244

208245
N = 186
246+
209247
}
210248

211249
if(pathway_name == "KEGG"){
212250

213251
N = 186
252+
214253
}
215254

216255
if(pathway_name == "REACTOME"){
217256

218257
N = 674
258+
219259
}
220260

221261
values = c()
@@ -231,6 +271,7 @@ calculate_ratio <- function(data_set, pathway_name, method_name){
231271
mean_value = c()
232272

233273
if (N_index > 10){
274+
234275
for(j in c(1:101)){
235276

236277
tmp_data = data_sc[tmp_index[j,],]

0 commit comments

Comments
 (0)