@@ -49,23 +49,27 @@ get_data <- function(tissue_name,
49
49
# the gene list of the scRNAseq data
50
50
common_gene = rownames(data_select )
51
51
52
+ # get the bulk RNAseq data
52
53
for (i in c(1 : length(bulk_file ))){
53
54
54
55
tmp = get_filtered_mRNA_bulk(bulk_file [i ])
55
56
56
57
common_gene = intersect(common_gene ,tmp $ gene_name )
57
58
58
59
bulk_data_tmp [[i ]] = tmp
60
+
59
61
}
60
62
61
63
# combine bulk RNAseq data
62
64
data_bulk = c()
63
65
66
+ # make the gene list of each data consistent
64
67
for (i in c(1 : length(bulk_file ))){
65
68
66
69
tmp = bulk_data_tmp [[i ]]
67
70
68
71
data_bulk = cbind(data_bulk , tmp $ FPKM [match(common_gene ,tmp $ gene_name )])
72
+
69
73
}
70
74
71
75
# get the bulk RNAseq for the imputation
@@ -121,15 +125,131 @@ get_data <- function(tissue_name,
121
125
122
126
data [[5 ]] = data_bulk_avg
123
127
128
+ # save the data
124
129
saveRDS(data ,file = paste0(" data_sc_bulk/" ,tissue_name ," _imputation.rds" ))
125
130
126
131
}
127
132
133
+ # plot tsne
134
+ plot_tsne <- function (plot_data ,
135
+ name ,
136
+ dot.size = 1 ){
137
+ # Parameter in the function
138
+ # plot_data: the data for visualization
139
+ # name: the name of the plot
140
+ # do.size: the size of the dot
141
+
142
+ plot_data %> %
143
+ dplyr :: group_by(ident ) %> %
144
+ summarize(x = median(x = x ), y = median(x = y )) - > centers
145
+
146
+ # plot the dots
147
+ p = ggplot(plot_data ,aes(x = x , y = y , color = as.factor(ident ))) +
148
+ ggtitle(name ) +
149
+ xlab(" TSNE_1" ) +
150
+ ylab(" TSNE_2" ) +
151
+ geom_point(size = dot.size ) +
152
+ theme_cowplot() +
153
+ theme(plot.title = element_text(size = 18 , hjust = 0.4 ),
154
+ axis.text = element_text(size = 12 ),
155
+ legend.title = element_blank())
156
+
157
+ # plot the annotation number
158
+ p = p + geom_text(data = centers , mapping = aes(x = x , y = y , label = ident ), colour = " black" )
159
+
160
+
161
+ return (p )
162
+
163
+ }
164
+
165
+
166
+ calculate_cluster <- function (cluster_index ,
167
+ cluster ,
168
+ data_tsne ){
169
+
170
+ # Parameter in the function
171
+ # cluster_index: the index of the cluster used for calculating dun index
172
+ # cluster: the clustering informaiton of the cells
173
+ # data_tsne: the tsne data
174
+
175
+ cluster1 = cluster
176
+
177
+ index = cluster1 != cluster_index
178
+
179
+ cluster1 [index ] = max(cluster ) + 1
180
+
181
+ stats_cluster_dun = matrix (0 ,nrow = 5 ,ncol = 1 )
182
+
183
+ # calculate the tsne
184
+ stat_tmp = cluster.stats(dist(as.matrix(data_tsne [[1 ]]$ Y )),cluster1 )
185
+
186
+ stats_cluster_dun [1 ] = stat_tmp $ dunn
187
+
188
+ stat_tmp = cluster.stats(dist(as.matrix(data_tsne [[2 ]]$ Y )),cluster1 )
189
+
190
+ stats_cluster_dun [2 ] = stat_tmp $ dunn
191
+
192
+ stat_tmp = cluster.stats(dist(as.matrix(data_tsne [[3 ]]$ Y )),cluster1 )
193
+
194
+ stats_cluster_dun [3 ] = stat_tmp $ dunn
195
+
196
+ stat_tmp = cluster.stats(dist(as.matrix(data_tsne [[4 ]]$ Y )),cluster1 )
197
+
198
+ stats_cluster_dun [4 ] = stat_tmp $ dunn
199
+
200
+ stat_tmp = cluster.stats(dist(as.matrix(data_tsne [[5 ]]$ Y )),cluster1 )
201
+
202
+ stats_cluster_dun [5 ] = stat_tmp $ dunn
203
+
204
+ return (stats_cluster )
205
+
206
+ }
207
+
208
+ # plot the bun values
209
+ plot_bun_value <- function (bun_value ,annotation_info ){
210
+
211
+ # Parameter in the function
212
+ # bun_value: the bun values
213
+ # annotation_info: the cell annotation information
214
+
215
+ # calculate the log2 values
216
+ index_tmp = - log2(bun_value )
217
+
218
+ # prepare the data for the boxplot
219
+ longData = melt(as.matrix(index_tmp ))
220
+
221
+ # define the column name
222
+ colnames(longData ) = c(" X1" ," X2" ," value" )
223
+
224
+ # plot the boxplot
225
+ pp = ggplot(data = longData , aes(x = as.factor(X2 ), y = value , fill = as.factor(X1 ))) +
226
+ geom_bar(stat = " identity" , position = position_dodge()) +
227
+ scale_color_manual(labels = c(" Raw" , " DrImpute" ," scImpute" ," MAGIC" ," SCRABBLE" )) +
228
+ scale_fill_manual(values = c(" #00AFBB" ," #0000CD" ," #E7B800" , " #FC4E07" , " #6ebb00" ),
229
+ name = " Data Type" ,
230
+ breaks = c(1 ,2 ,3 ,4 ,5 ),
231
+ labels = c(" Raw Data" ," DrImpute" ," scImpute" ," MAGIC" ," SCRABBLE" )) +
232
+ theme_bw() +
233
+ ylab(" -log2(Dunn Index)" ) +
234
+ scale_x_discrete(breaks = annotation_info [,2 ],labels = annotation_info [,1 ]) +
235
+ theme(axis.title.x = element_blank(),axis.text.x = element_text(size = 9 , angle = 45 ,vjust = 0.6 ),
236
+ axis.title.y = element_text(size = 14 ),axis.text.y = element_text(size = 12 ),
237
+ legend.position = " bottom" ) +
238
+ guides(fill = guide_legend(title = " Data Type" ))
239
+
240
+ return (pp )
241
+
242
+ }
128
243
129
244
pdf_dun_tsne <- function (tissue_name ){
130
245
246
+ # Parameter in the function
247
+ # tissue_name: the name of the tissue
248
+
249
+ # load the tSNE data
131
250
data_tsne = readRDS(file = paste0(" data_all/" ,tissue_name ," _TSNE.rds" ))
132
251
252
+ # load the scRNAseq and related data
133
253
load(paste0(" data_sc_bulk/sc_" ,tissue_name ," .RData" ))
134
254
135
255
pl = list ()
@@ -157,6 +277,7 @@ pdf_dun_tsne <- function(tissue_name){
157
277
colnames(plot_data ) = c(" x" ," y" ," ident" )
158
278
159
279
pl [[i ]] = plot_tsne(plot_data ,paste0(method_name [i ]," : " ,tissue_name ), dot.size = 1 )
280
+
160
281
}
161
282
162
283
p1 = grid.arrange(grobs = pl ,ncol = 3 )
@@ -184,7 +305,7 @@ pdf_dun_tsne <- function(tissue_name){
184
305
185
306
if (i %in% cluster_n ){
186
307
187
- tmp_val = calcuate_cluster (i ,as.numeric(knn5 ),data_tsne )
308
+ tmp_val = calculate_cluster (i ,as.numeric(knn5 ),data_tsne )
188
309
189
310
bun_value = cbind(bun_value ,tmp_val [[1 ]])
190
311
@@ -209,8 +330,13 @@ pdf_dun_tsne <- function(tissue_name){
209
330
210
331
pdf_dun_tsne1 = function (tissue_name ){
211
332
333
+ # Parameter in the function
334
+ # tissue_name: the name of the tissue
335
+
336
+ # load the tSNE data
212
337
data_tsne = readRDS(file = paste0(" data_all/" ,tissue_name ," _TSNE.rds" ))
213
338
339
+ # load the scRNAseq and related data
214
340
load(paste0(" data_sc_bulk/sc_" ,tissue_name ," .RData" ))
215
341
216
342
pl = list ()
@@ -265,7 +391,7 @@ pdf_dun_tsne1 = function(tissue_name){
265
391
266
392
if (i %in% cluster_n ){
267
393
268
- tmp_val = calcuate_cluster (i ,as.numeric(knn5 ),data_tsne )
394
+ tmp_val = calculate_cluster (i ,as.numeric(knn5 ),data_tsne )
269
395
270
396
bun_value = cbind(bun_value ,tmp_val [[1 ]])
271
397
0 commit comments