Skip to content

Commit 808985f

Browse files
authored
Add files via upload
1 parent fa78727 commit 808985f

File tree

1 file changed

+128
-2
lines changed

1 file changed

+128
-2
lines changed

R/BenchMark/MCA/analysis_library.R

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,27 @@ get_data <- function(tissue_name,
4949
# the gene list of the scRNAseq data
5050
common_gene = rownames(data_select)
5151

52+
# get the bulk RNAseq data
5253
for(i in c(1:length(bulk_file))){
5354

5455
tmp = get_filtered_mRNA_bulk(bulk_file[i])
5556

5657
common_gene = intersect(common_gene,tmp$gene_name)
5758

5859
bulk_data_tmp[[i]] = tmp
60+
5961
}
6062

6163
# combine bulk RNAseq data
6264
data_bulk = c()
6365

66+
# make the gene list of each data consistent
6467
for(i in c(1:length(bulk_file))){
6568

6669
tmp = bulk_data_tmp[[i]]
6770

6871
data_bulk = cbind(data_bulk, tmp$FPKM[match(common_gene,tmp$gene_name)])
72+
6973
}
7074

7175
# get the bulk RNAseq for the imputation
@@ -121,15 +125,131 @@ get_data <- function(tissue_name,
121125

122126
data[[5]] = data_bulk_avg
123127

128+
# save the data
124129
saveRDS(data,file = paste0("data_sc_bulk/",tissue_name,"_imputation.rds"))
125130

126131
}
127132

133+
# plot tsne
134+
plot_tsne <- function(plot_data,
135+
name,
136+
dot.size = 1){
137+
# Parameter in the function
138+
# plot_data: the data for visualization
139+
# name: the name of the plot
140+
# do.size: the size of the dot
141+
142+
plot_data %>%
143+
dplyr::group_by(ident) %>%
144+
summarize(x = median(x = x), y = median(x = y)) -> centers
145+
146+
# plot the dots
147+
p = ggplot(plot_data,aes(x = x, y = y, color = as.factor(ident))) +
148+
ggtitle(name) +
149+
xlab("TSNE_1") +
150+
ylab("TSNE_2") +
151+
geom_point(size = dot.size) +
152+
theme_cowplot() +
153+
theme(plot.title = element_text(size = 18, hjust = 0.4),
154+
axis.text = element_text(size = 12),
155+
legend.title=element_blank())
156+
157+
# plot the annotation number
158+
p = p + geom_text(data = centers, mapping = aes(x = x, y = y, label = ident), colour = "black")
159+
160+
161+
return(p)
162+
163+
}
164+
165+
166+
calculate_cluster <- function(cluster_index,
167+
cluster,
168+
data_tsne){
169+
170+
# Parameter in the function
171+
# cluster_index: the index of the cluster used for calculating dun index
172+
# cluster: the clustering informaiton of the cells
173+
# data_tsne: the tsne data
174+
175+
cluster1 = cluster
176+
177+
index = cluster1 != cluster_index
178+
179+
cluster1[index] = max(cluster) + 1
180+
181+
stats_cluster_dun = matrix(0,nrow = 5,ncol = 1)
182+
183+
# calculate the tsne
184+
stat_tmp = cluster.stats(dist(as.matrix(data_tsne[[1]]$Y)),cluster1)
185+
186+
stats_cluster_dun[1] = stat_tmp$dunn
187+
188+
stat_tmp = cluster.stats(dist(as.matrix(data_tsne[[2]]$Y)),cluster1)
189+
190+
stats_cluster_dun[2] = stat_tmp$dunn
191+
192+
stat_tmp = cluster.stats(dist(as.matrix(data_tsne[[3]]$Y)),cluster1)
193+
194+
stats_cluster_dun[3] = stat_tmp$dunn
195+
196+
stat_tmp = cluster.stats(dist(as.matrix(data_tsne[[4]]$Y)),cluster1)
197+
198+
stats_cluster_dun[4] = stat_tmp$dunn
199+
200+
stat_tmp = cluster.stats(dist(as.matrix(data_tsne[[5]]$Y)),cluster1)
201+
202+
stats_cluster_dun[5] = stat_tmp$dunn
203+
204+
return(stats_cluster)
205+
206+
}
207+
208+
# plot the bun values
209+
plot_bun_value <- function(bun_value,annotation_info){
210+
211+
# Parameter in the function
212+
# bun_value: the bun values
213+
# annotation_info: the cell annotation information
214+
215+
# calculate the log2 values
216+
index_tmp = -log2(bun_value)
217+
218+
# prepare the data for the boxplot
219+
longData = melt(as.matrix(index_tmp))
220+
221+
# define the column name
222+
colnames(longData) = c("X1","X2","value")
223+
224+
# plot the boxplot
225+
pp = ggplot(data=longData, aes(x=as.factor(X2), y= value, fill= as.factor(X1))) +
226+
geom_bar(stat="identity", position=position_dodge()) +
227+
scale_color_manual(labels = c("Raw", "DrImpute","scImpute","MAGIC","SCRABBLE")) +
228+
scale_fill_manual(values= c("#00AFBB","#0000CD","#E7B800", "#FC4E07", "#6ebb00"),
229+
name="Data Type",
230+
breaks=c(1,2,3,4,5),
231+
labels=c("Raw Data","DrImpute","scImpute","MAGIC","SCRABBLE")) +
232+
theme_bw() +
233+
ylab("-log2(Dunn Index)") +
234+
scale_x_discrete(breaks= annotation_info[,2],labels=annotation_info[,1]) +
235+
theme(axis.title.x=element_blank(),axis.text.x = element_text(size=9, angle=45 ,vjust=0.6),
236+
axis.title.y=element_text(size=14),axis.text.y = element_text(size=12),
237+
legend.position = "bottom") +
238+
guides(fill = guide_legend(title = "Data Type"))
239+
240+
return(pp)
241+
242+
}
128243

129244
pdf_dun_tsne <- function(tissue_name){
130245

246+
# Parameter in the function
247+
# tissue_name: the name of the tissue
248+
249+
# load the tSNE data
131250
data_tsne = readRDS(file = paste0("data_all/",tissue_name,"_TSNE.rds"))
132251

252+
# load the scRNAseq and related data
133253
load(paste0("data_sc_bulk/sc_",tissue_name,".RData"))
134254

135255
pl = list()
@@ -157,6 +277,7 @@ pdf_dun_tsne <- function(tissue_name){
157277
colnames(plot_data) = c("x","y","ident")
158278

159279
pl[[i]] = plot_tsne(plot_data,paste0(method_name[i],": ",tissue_name), dot.size = 1)
280+
160281
}
161282

162283
p1 = grid.arrange(grobs = pl,ncol = 3)
@@ -184,7 +305,7 @@ pdf_dun_tsne <- function(tissue_name){
184305

185306
if(i %in% cluster_n){
186307

187-
tmp_val = calcuate_cluster(i,as.numeric(knn5),data_tsne)
308+
tmp_val = calculate_cluster(i,as.numeric(knn5),data_tsne)
188309

189310
bun_value = cbind(bun_value,tmp_val[[1]])
190311

@@ -209,8 +330,13 @@ pdf_dun_tsne <- function(tissue_name){
209330

210331
pdf_dun_tsne1 = function(tissue_name){
211332

333+
# Parameter in the function
334+
# tissue_name: the name of the tissue
335+
336+
# load the tSNE data
212337
data_tsne = readRDS(file = paste0("data_all/",tissue_name,"_TSNE.rds"))
213338

339+
# load the scRNAseq and related data
214340
load(paste0("data_sc_bulk/sc_",tissue_name,".RData"))
215341

216342
pl = list()
@@ -265,7 +391,7 @@ pdf_dun_tsne1 = function(tissue_name){
265391

266392
if(i %in% cluster_n){
267393

268-
tmp_val = calcuate_cluster(i,as.numeric(knn5),data_tsne)
394+
tmp_val = calculate_cluster(i,as.numeric(knn5),data_tsne)
269395

270396
bun_value = cbind(bun_value,tmp_val[[1]])
271397

0 commit comments

Comments
 (0)