1
+ # calculate the variance across the rows or the columns
1
2
MatVar <- function (x , dim = 1 , ... ) {
2
3
4
+ # parameter in the function
5
+ # x: the input matrix
6
+ # dim: determine the variance across the row or column.
7
+ # 1 is the one across the row and 2 is the one across the column
8
+
3
9
if (dim == 1 ){
4
10
5
11
rowSums((x - rowMeans(x , ... ))^ 2 , ... )/ (dim(x )[2 ] - 1 )
@@ -11,22 +17,34 @@ MatVar <- function(x, dim = 1, ...) {
11
17
} else stop(" Please enter valid dimension" )
12
18
}
13
19
20
+ # plot the pathway ratio
14
21
plot_pathway_ratio <- function (dataV3 , dataType , pathway_name ){
15
22
23
+ # Parameter in the function
24
+ # dataV3: the ratio of the different methods, the columns are
25
+ # the different methods
26
+ # dataType: the cell types
27
+ # pathway_name: the name of the pathway database
28
+
16
29
if (dataType == " NPC" ){
17
- # select the raw data, the imputed data by scImpute, scbMC, MAGIC
30
+
31
+ # define the number of data points
18
32
N = dim(dataV3 )[2 ]
19
33
34
+ # define the lowest levels of the pvalues
20
35
hlim = 190
21
36
37
+ # prepare the data as a vector for the boxplot
22
38
dataV1 = data.frame (y = (as.vector(as.matrix(dataV3 ))))
23
39
24
40
dataV1 $ group = rep(c(1 : 5 ),N )
25
41
42
+ # calculate the pvalues
26
43
my_comparisons = list ( c(" 1" , " 5" ), c(" 2" , " 5" ), c(" 3" , " 5" ), c(" 4" , " 5" ))
27
44
28
45
pval = compare_means(y ~ group ,data = dataV1 , method = " t.test" , ref.group = " 5" )
29
46
47
+ # plot the boxplots
30
48
pl = ggboxplot(dataV1 , x = " group" , y = " y" , fill = " group" ,
31
49
palette = c(" #00AFBB" ," #0000CD" , " #E7B800" , " #FC4E07" , " #6ebb00" ),outlier.shape = NA ) +
32
50
stat_boxplot(geom = " errorbar" , width = 0.3 ) +
@@ -45,19 +63,23 @@ plot_pathway_ratio <- function(dataV3, dataType, pathway_name){
45
63
46
64
}else {
47
65
48
- # select the raw data, the imputed data by scImpute, scbMC, MAGIC
66
+ # define the number of data points
49
67
N = dim(dataV3 )[2 ]
50
68
69
+ # define the lowest levels of the pvalues
51
70
hlim = 150
52
71
72
+ # prepare the data as a vector for the boxplot
53
73
dataV1 = data.frame (y = (as.vector(as.matrix(dataV3 ))))
54
74
75
+ # calculate the pvalues
55
76
dataV1 $ group = rep(c(1 : 5 ),N )
56
77
57
78
my_comparisons = list ( c(" 1" , " 5" ), c(" 2" , " 5" ), c(" 3" , " 5" ), c(" 4" , " 5" ))
58
79
59
80
pval = compare_means(y ~ group ,data = dataV1 , method = " t.test" , ref.group = " 5" )
60
81
82
+ # plot the boxplots
61
83
pl = ggboxplot(dataV1 , x = " group" , y = " y" , fill = " group" ,
62
84
palette = c(" #00AFBB" ," #0000CD" , " #E7B800" , " #FC4E07" , " #6ebb00" ),outlier.shape = NA ) +
63
85
stat_boxplot(geom = " errorbar" , width = 0.3 ) +
@@ -83,38 +105,48 @@ plot_pathway_ratio <- function(dataV3, dataType, pathway_name){
83
105
84
106
generate_index <- function (data_set , pathway_name ){
85
107
108
+ # Parameter in the function
109
+ # data_set: the data of cell types
110
+ # pathway_name: the name of pathway database
111
+
86
112
# load the gene
87
- data_gene = fread(file = " data_all/gene_ESC .csv" ,
88
- header = FALSE )
113
+ data_gene = fread(file = " data_all/gene_name .csv" ,
114
+ sep = ' , ' , header = FALSE )
89
115
116
+ # load the scRNAseq data
90
117
data_sc = as.matrix(fread(file = paste0(" data_all/data_sc_" ,
91
118
data_set ,
92
119
" .csv" )))
93
120
94
-
121
+ # calculate the variance across the genes
95
122
var0 = MatVar(data_sc ,1 )
96
123
97
124
index_sc = var0 > 1e-10
98
125
99
126
data_gene = data_gene [index_sc ,]
100
127
101
-
102
128
n_gene = dim(data_gene )[1 ]
103
129
104
-
130
+ # determine the pathway numbers
105
131
if (pathway_name == " IPA" ){
132
+
106
133
N = 186
134
+
107
135
}
108
136
109
137
if (pathway_name == " KEGG" ){
138
+
110
139
N = 186
140
+
111
141
}
112
142
113
143
if (pathway_name == " REACTOME" ){
144
+
114
145
N = 674
146
+
115
147
}
116
148
117
-
149
+ # calculate the index
118
150
index = list ()
119
151
120
152
for (i in c(1 : N )){
@@ -154,9 +186,13 @@ generate_index <- function(data_set, pathway_name){
154
186
)
155
187
}
156
188
157
-
189
+ # Calculate the ratio
158
190
calculate_ratio <- function (data_set , pathway_name , method_name ){
159
191
192
+ # Parameter in the function
193
+ # data_set: the data of cell type
194
+ # pathway_name: the name of the pathway
195
+ # method_name: the name of the method used for the imputation
160
196
161
197
load(file = paste0(" data_all/" ,
162
198
pathway_name ,
@@ -203,19 +239,23 @@ calculate_ratio <- function(data_set, pathway_name, method_name){
203
239
" _scrabble_imputation.rds" ))
204
240
}
205
241
242
+ # determine the pathway numbers
206
243
if (pathway_name == " IPA" ){
207
244
208
245
N = 186
246
+
209
247
}
210
248
211
249
if (pathway_name == " KEGG" ){
212
250
213
251
N = 186
252
+
214
253
}
215
254
216
255
if (pathway_name == " REACTOME" ){
217
256
218
257
N = 674
258
+
219
259
}
220
260
221
261
values = c()
@@ -231,6 +271,7 @@ calculate_ratio <- function(data_set, pathway_name, method_name){
231
271
mean_value = c()
232
272
233
273
if (N_index > 10 ){
274
+
234
275
for (j in c(1 : 101 )){
235
276
236
277
tmp_data = data_sc [tmp_index [j ,],]
0 commit comments