Skip to content

Commit 4377722

Browse files
authored
Add files via upload
1 parent d9ee2f7 commit 4377722

File tree

2 files changed

+109
-10
lines changed

2 files changed

+109
-10
lines changed

R/BenchMark/SimulationData_Strategy2/analysis_library.R

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,17 @@ run_drimpute <- function(dropout_index, rand_seed){
106106
# seed_value: the random seed
107107

108108
# create the folder
109-
dir.create(file.path("/imputation_drimpute_data/"))
109+
dir.create(file.path("/imputation_drimpute_data/"),
110+
stringsAsFactors = FALSE)
110111

111112
# load the data
112-
data_sc = as.matrix(fread(paste0(cwd,"/data_all/data_raw_",dropout_index,"_",rand_seed,".csv")))
113+
data_sc = as.matrix(fread(paste0("/data_all/data_raw_",dropout_index,"_",rand_seed,".csv")))
113114

114115
# run the imputation
115116
extdata = DrImpute(data_sc)
116117

117118
# save the data
118-
saveRDS(extdata, file = paste0(cwd1,"data_",dropout_index,"_",rand_seed,"_drimpute_imputation.rds"))
119+
saveRDS(extdata, file = paste0("/imputation_drimpute_data/data_",dropout_index,"_",rand_seed,"_drimpute_imputation.rds"))
119120

120121
}
121122

@@ -127,8 +128,11 @@ run_scimpute <- function(dropout_index, rand_seed){
127128
# seed_value: the random seed
128129

129130
# create the folder
130-
dir.create(file.path("/imputation_scimpute_data/"), showWarnings = FALSE)
131-
dir.create(file.path("/temp_scimpute_data/"), showWarnings = FALSE)
131+
dir.create(file.path("/imputation_scimpute_data/"),
132+
showWarnings = FALSE)
133+
134+
dir.create(file.path("/temp_scimpute_data/"),
135+
showWarnings = FALSE)
132136

133137
# load the data
134138
data_sc = as.matrix(fread(paste0("/data_all/data_raw_",
@@ -179,14 +183,14 @@ run_scimpute <- function(dropout_index, rand_seed){
179183
#
180184
# cwd = os.getcwd()
181185
#
182-
# if not os.path.exists(cwd+"/magic_data"):
183-
# os.makedirs(cwd+"/magic_data")
186+
# if not os.path.exists(cwd+"/imputation_magic_data"):
187+
# os.makedirs(cwd+"/imputation_magic_data")
184188
#
185189
# X =pd.read_csv(cwd + "/data_all/data_raw_"+str(dropout_value)+"_"+str(seed_value)+".csv",sep = ',')
186190
# magic_operator = magic.MAGIC()
187191
# X_magic = magic_operator.fit_transform(X.T)
188192
# out_magic = X_magic.T
189-
# out_magic.to_csv(cwd+"/magic_data/magic_"+str(dropout_value)+"_"+str(seed_value)+".csv", sep = ',', header= None)
193+
# out_magic.to_csv(cwd+"/imputation_magic_data/magic_"+str(dropout_value)+"_"+str(seed_value)+".csv", sep = ',', header= None)
190194
# ----------------------------------------------------------
191195

192196
# run SCRABBLE
@@ -225,7 +229,7 @@ run_scrabble <- function(dropout_index, rand_seed){
225229
error_inner_threshold = 1e-5)
226230

227231
# save the data
228-
saveRDS(result, file = paste0(cwd1,"data_",i,"_",j,"_",k,"_scrabble_imputation.rds"))
232+
saveRDS(result, file = paste0("/imputation_scrabble_data/data_",i,"_",j,"_",k,"_scrabble_imputation.rds"))
229233

230234
}
231235

@@ -257,7 +261,7 @@ get_data_HF <- function(dropout_index, rand_seed){
257261
rand_seed,"_scimpute_imputation.rds"))
258262

259263
# load the imputed data by MAGIC
260-
data_magic = as.matrix(fread(paste0("/magic_data/magic_",
264+
data_magic = as.matrix(fread(paste0("/imputation_magic_data/magic_",
261265
dropout_index,"_",
262266
rand_seed,".csv")))
263267
data_magic = data_magic[,-1]
@@ -307,93 +311,131 @@ calculate_error <- function(dropout_index, rand_seed){
307311
data = get_data_HF(dropout_index, rand_seed)
308312

309313
data_true = data$data_true
314+
310315
data_dropout = data$data_raw
316+
311317
data_drimpute = data$data_drimpute
318+
312319
data_scimpute = data$data_scimpute
320+
313321
data_magic = data$data_magic
322+
314323
data_scrabble = data$data_scrabble
315324

316325
error = matrix(0, nrow = 6, ncol = 1)
326+
317327
error[1] = norm(data_dropout - data_true, type = "2")
328+
318329
error[2] = norm(data_drimpute - data_true, type = "2")
330+
319331
error[3] = norm(data_scimpute - data_true, type = "2")
332+
320333
error[4] = norm(data_magic - data_true, type = "2")
334+
321335
error[5] = norm(data_scrabble - data_true, type = "2")
336+
322337
error[6] = 1 - nnzero(data_dropout)/length(data_dropout)
323338

324339
# gene-gene correlation
325340
# true data
326341
data_true_gene = cor(t(data_true), method = "pearson")
342+
327343
data_true_gene[is.na(data_true_gene)] = 0
328344

329345
# dropout data
330346
data_dropout_gene = cor(t(data_dropout), method = "pearson")
347+
331348
data_dropout_gene[is.na(data_dropout_gene)] = 0
332349

333350
# DrImpute data
334351
data_drimpute_gene = cor(t(data_drimpute), method = "pearson")
352+
335353
data_drimpute_gene[is.na(data_drimpute_gene)] = 0
336354

337355
# scImpute data
338356
data_scimpute_gene = cor(t(data_scimpute), method = "pearson")
357+
339358
data_scimpute_gene[is.na(data_scimpute_gene)] = 0
340359

341360
# MAGIC data
342361
data_magic_gene = cor(t(data_magic), method = "pearson")
362+
343363
data_magic_gene[is.na(data_magic_gene)] = 0
344364

345365
# SCRABBLE data
346366
data_scrabble_gene = cor(t(data_scrabble), method = "pearson")
367+
347368
data_scrabble_gene[is.na(data_scrabble_gene)] = 0
348369

349370

350371
error_gene = matrix(0, nrow = 6, ncol = 1)
372+
351373
error_gene[1] = calculate_similarity(data_true_gene, data_dropout_gene)
374+
352375
error_gene[2] = calculate_similarity(data_true_gene, data_drimpute_gene)
376+
353377
error_gene[3] = calculate_similarity(data_true_gene, data_scimpute_gene)
378+
354379
error_gene[4] = calculate_similarity(data_true_gene, data_magic_gene)
380+
355381
error_gene[5] = calculate_similarity(data_true_gene, data_scrabble_gene)
382+
356383
error_gene[6] = 1 - nnzero(data_dropout)/length(data_dropout)
357384

358385

359386
# cell-cell correlation
360387

361388
# true data
362389
data_true_cell = cor(data_true, method = "pearson")
390+
363391
data_true_cell[is.na(data_true_cell)] = 0
364392

365393
# dropout data
366394
data_dropout_cell = cor(data_dropout, method = "pearson")
395+
367396
data_dropout_cell[is.na(data_dropout_cell)] = 0
368397

369398
# DrImpute data
370399
data_drimpute_cell = cor(data_drimpute, method = "pearson")
400+
371401
data_drimpute_cell[is.na(data_drimpute_cell)] = 0
372402

373403
# scImpute data
374404
data_scimpute_cell = cor(data_scimpute, method = "pearson")
405+
375406
data_scimpute_cell[is.na(data_scimpute_cell)] = 0
376407

377408
# MAGIC data
378409
data_magic_cell = cor(data_magic, method = "pearson")
410+
379411
data_magic_cell[is.na(data_magic_cell)] = 0
380412

381413
# SCRABBLE
382414
data_scrabble_cell = cor(data_scrabble, method = "pearson")
415+
383416
data_scrabble_cell[is.na(data_scrabble_cell)] = 0
384417

385418
error_cell = matrix(0, nrow = 6, ncol = 1)
419+
386420
error_cell[1] = calculate_similarity(data_true_cell, data_dropout_cell)
421+
387422
error_cell[2] = calculate_similarity(data_true_cell, data_drimpute_cell)
423+
388424
error_cell[3] = calculate_similarity(data_true_cell, data_scimpute_cell)
425+
389426
error_cell[4] = calculate_similarity(data_true_cell, data_magic_cell)
427+
390428
error_cell[5] = calculate_similarity(data_true_cell, data_scrabble_cell)
429+
391430
error_cell[6] = 1 - nnzero(data_dropout)/length(data_dropout)
392431

393432
# define the error as a list
394433
result <- list()
434+
395435
result$error <- error
436+
396437
result$error_cell <- error_cell
438+
397439
result$error_gene <- error_gene
398440

399441
return(result)
@@ -924,7 +966,9 @@ plot_cor_p <- function(data, name){
924966
limit = c(min(c(data1)),max(c(data1)))
925967

926968
myPalette = colorRampPalette(rev(brewer.pal(11, "Spectral")))
969+
927970
colnames(data) = NULL
971+
928972
rownames(data) = NULL
929973

930974
# prepare the plot data
@@ -970,38 +1014,50 @@ plot_cor_HF <- function(dropout_index, rand_seed){
9701014

9711015
# set up the data
9721016
data_true = data$data_true
1017+
9731018
data_dropout = data$data_raw
1019+
9741020
data_drimpute = data$data_drimpute
1021+
9751022
data_scimpute = data$data_scimpute
1023+
9761024
data_magic = data$data_magic
1025+
9771026
data_scrabble = data$data_scrabble
9781027

9791028
# gene-gene correlation
9801029
# true data
9811030
data_true_gene = cor(t(data_true), method = "pearson")
1031+
9821032
data_true_gene[is.na(data_true_gene)] = 0
9831033

9841034
# dropout data
9851035
data_dropout_gene = cor(t(data_dropout), method = "pearson")
1036+
9861037
data_dropout_gene[is.na(data_dropout_gene)] = 0
9871038

9881039
# DrImpute data
9891040
data_drimpute_gene = cor(t(data_drimpute), method = "pearson")
1041+
9901042
data_drimpute_gene[is.na(data_drimpute_gene)] = 0
9911043

9921044
# scImpute data
9931045
data_scimpute_gene = cor(t(data_scimpute), method = "pearson")
1046+
9941047
data_scimpute_gene[is.na(data_scimpute_gene)] = 0
9951048

9961049
# MAGIC data
9971050
data_magic_gene = cor(t(data_magic), method = "pearson")
1051+
9981052
data_magic_gene[is.na(data_magic_gene)] = 0
9991053

10001054
# SCRABBLE data
10011055
data_scrabble_gene = cor(t(data_scrabble), method = "pearson")
1056+
10021057
data_scrabble_gene[is.na(data_scrabble_gene)] = 0
10031058

10041059
p <- list()
1060+
10051061
pl <- list()
10061062

10071063
pl[[1]] <- plot_cor_p(data_true_gene,"Gene: True Data")
@@ -1021,26 +1077,32 @@ plot_cor_HF <- function(dropout_index, rand_seed){
10211077
# cell-cell correlation
10221078
# true data
10231079
data_true_cell = cor(data_true, method = "pearson")
1080+
10241081
data_true_cell[is.na(data_true_cell)] = 0
10251082

10261083
# dropout data
10271084
data_dropout_cell = cor(data_dropout, method = "pearson")
1085+
10281086
data_dropout_cell[is.na(data_dropout_cell)] = 0
10291087

10301088
# DrImpute data
10311089
data_drimpute_cell = cor(data_drimpute, method = "pearson")
1090+
10321091
data_drimpute_cell[is.na(data_drimpute_cell)] = 0
10331092

10341093
# scImpute data
10351094
data_scimpute_cell = cor(data_scimpute, method = "pearson")
1095+
10361096
data_scimpute_cell[is.na(data_scimpute_cell)] = 0
10371097

10381098
# MAGIC data
10391099
data_magic_cell = cor(data_magic, method = "pearson")
1100+
10401101
data_magic_cell[is.na(data_magic_cell)] = 0
10411102

10421103
# SCRABBLE
10431104
data_scrabble_cell = cor(data_scrabble, method = "pearson")
1105+
10441106
data_scrabble_cell[is.na(data_scrabble_cell)] = 0
10451107

10461108
pl <- list()

0 commit comments

Comments
 (0)