@@ -106,16 +106,17 @@ run_drimpute <- function(dropout_index, rand_seed){
106
106
# seed_value: the random seed
107
107
108
108
# create the folder
109
- dir.create(file.path(" /imputation_drimpute_data/" ))
109
+ dir.create(file.path(" /imputation_drimpute_data/" ),
110
+ stringsAsFactors = FALSE )
110
111
111
112
# load the data
112
- data_sc = as.matrix(fread(paste0(cwd , " /data_all/data_raw_" ,dropout_index ," _" ,rand_seed ," .csv" )))
113
+ data_sc = as.matrix(fread(paste0(" /data_all/data_raw_" ,dropout_index ," _" ,rand_seed ," .csv" )))
113
114
114
115
# run the imputation
115
116
extdata = DrImpute(data_sc )
116
117
117
118
# save the data
118
- saveRDS(extdata , file = paste0(cwd1 , " data_" ,dropout_index ," _" ,rand_seed ," _drimpute_imputation.rds" ))
119
+ saveRDS(extdata , file = paste0(" /imputation_drimpute_data/ data_" ,dropout_index ," _" ,rand_seed ," _drimpute_imputation.rds" ))
119
120
120
121
}
121
122
@@ -127,8 +128,11 @@ run_scimpute <- function(dropout_index, rand_seed){
127
128
# seed_value: the random seed
128
129
129
130
# create the folder
130
- dir.create(file.path(" /imputation_scimpute_data/" ), showWarnings = FALSE )
131
- dir.create(file.path(" /temp_scimpute_data/" ), showWarnings = FALSE )
131
+ dir.create(file.path(" /imputation_scimpute_data/" ),
132
+ showWarnings = FALSE )
133
+
134
+ dir.create(file.path(" /temp_scimpute_data/" ),
135
+ showWarnings = FALSE )
132
136
133
137
# load the data
134
138
data_sc = as.matrix(fread(paste0(" /data_all/data_raw_" ,
@@ -179,14 +183,14 @@ run_scimpute <- function(dropout_index, rand_seed){
179
183
#
180
184
# cwd = os.getcwd()
181
185
#
182
- # if not os.path.exists(cwd+"/magic_data "):
183
- # os.makedirs(cwd+"/magic_data ")
186
+ # if not os.path.exists(cwd+"/imputation_magic_data "):
187
+ # os.makedirs(cwd+"/imputation_magic_data ")
184
188
#
185
189
# X =pd.read_csv(cwd + "/data_all/data_raw_"+str(dropout_value)+"_"+str(seed_value)+".csv",sep = ',')
186
190
# magic_operator = magic.MAGIC()
187
191
# X_magic = magic_operator.fit_transform(X.T)
188
192
# out_magic = X_magic.T
189
- # out_magic.to_csv(cwd+"/magic_data /magic_"+str(dropout_value)+"_"+str(seed_value)+".csv", sep = ',', header= None)
193
+ # out_magic.to_csv(cwd+"/imputation_magic_data /magic_"+str(dropout_value)+"_"+str(seed_value)+".csv", sep = ',', header= None)
190
194
# ----------------------------------------------------------
191
195
192
196
# run SCRABBLE
@@ -225,7 +229,7 @@ run_scrabble <- function(dropout_index, rand_seed){
225
229
error_inner_threshold = 1e-5 )
226
230
227
231
# save the data
228
- saveRDS(result , file = paste0(cwd1 , " data_" ,i ," _" ,j ," _" ,k ," _scrabble_imputation.rds" ))
232
+ saveRDS(result , file = paste0(" /imputation_scrabble_data/ data_" ,i ," _" ,j ," _" ,k ," _scrabble_imputation.rds" ))
229
233
230
234
}
231
235
@@ -257,7 +261,7 @@ get_data_HF <- function(dropout_index, rand_seed){
257
261
rand_seed ," _scimpute_imputation.rds" ))
258
262
259
263
# load the imputed data by MAGIC
260
- data_magic = as.matrix(fread(paste0(" /magic_data /magic_" ,
264
+ data_magic = as.matrix(fread(paste0(" /imputation_magic_data /magic_" ,
261
265
dropout_index ," _" ,
262
266
rand_seed ," .csv" )))
263
267
data_magic = data_magic [,- 1 ]
@@ -307,93 +311,131 @@ calculate_error <- function(dropout_index, rand_seed){
307
311
data = get_data_HF(dropout_index , rand_seed )
308
312
309
313
data_true = data $ data_true
314
+
310
315
data_dropout = data $ data_raw
316
+
311
317
data_drimpute = data $ data_drimpute
318
+
312
319
data_scimpute = data $ data_scimpute
320
+
313
321
data_magic = data $ data_magic
322
+
314
323
data_scrabble = data $ data_scrabble
315
324
316
325
error = matrix (0 , nrow = 6 , ncol = 1 )
326
+
317
327
error [1 ] = norm(data_dropout - data_true , type = " 2" )
328
+
318
329
error [2 ] = norm(data_drimpute - data_true , type = " 2" )
330
+
319
331
error [3 ] = norm(data_scimpute - data_true , type = " 2" )
332
+
320
333
error [4 ] = norm(data_magic - data_true , type = " 2" )
334
+
321
335
error [5 ] = norm(data_scrabble - data_true , type = " 2" )
336
+
322
337
error [6 ] = 1 - nnzero(data_dropout )/ length(data_dropout )
323
338
324
339
# gene-gene correlation
325
340
# true data
326
341
data_true_gene = cor(t(data_true ), method = " pearson" )
342
+
327
343
data_true_gene [is.na(data_true_gene )] = 0
328
344
329
345
# dropout data
330
346
data_dropout_gene = cor(t(data_dropout ), method = " pearson" )
347
+
331
348
data_dropout_gene [is.na(data_dropout_gene )] = 0
332
349
333
350
# DrImpute data
334
351
data_drimpute_gene = cor(t(data_drimpute ), method = " pearson" )
352
+
335
353
data_drimpute_gene [is.na(data_drimpute_gene )] = 0
336
354
337
355
# scImpute data
338
356
data_scimpute_gene = cor(t(data_scimpute ), method = " pearson" )
357
+
339
358
data_scimpute_gene [is.na(data_scimpute_gene )] = 0
340
359
341
360
# MAGIC data
342
361
data_magic_gene = cor(t(data_magic ), method = " pearson" )
362
+
343
363
data_magic_gene [is.na(data_magic_gene )] = 0
344
364
345
365
# SCRABBLE data
346
366
data_scrabble_gene = cor(t(data_scrabble ), method = " pearson" )
367
+
347
368
data_scrabble_gene [is.na(data_scrabble_gene )] = 0
348
369
349
370
350
371
error_gene = matrix (0 , nrow = 6 , ncol = 1 )
372
+
351
373
error_gene [1 ] = calculate_similarity(data_true_gene , data_dropout_gene )
374
+
352
375
error_gene [2 ] = calculate_similarity(data_true_gene , data_drimpute_gene )
376
+
353
377
error_gene [3 ] = calculate_similarity(data_true_gene , data_scimpute_gene )
378
+
354
379
error_gene [4 ] = calculate_similarity(data_true_gene , data_magic_gene )
380
+
355
381
error_gene [5 ] = calculate_similarity(data_true_gene , data_scrabble_gene )
382
+
356
383
error_gene [6 ] = 1 - nnzero(data_dropout )/ length(data_dropout )
357
384
358
385
359
386
# cell-cell correlation
360
387
361
388
# true data
362
389
data_true_cell = cor(data_true , method = " pearson" )
390
+
363
391
data_true_cell [is.na(data_true_cell )] = 0
364
392
365
393
# dropout data
366
394
data_dropout_cell = cor(data_dropout , method = " pearson" )
395
+
367
396
data_dropout_cell [is.na(data_dropout_cell )] = 0
368
397
369
398
# DrImpute data
370
399
data_drimpute_cell = cor(data_drimpute , method = " pearson" )
400
+
371
401
data_drimpute_cell [is.na(data_drimpute_cell )] = 0
372
402
373
403
# scImpute data
374
404
data_scimpute_cell = cor(data_scimpute , method = " pearson" )
405
+
375
406
data_scimpute_cell [is.na(data_scimpute_cell )] = 0
376
407
377
408
# MAGIC data
378
409
data_magic_cell = cor(data_magic , method = " pearson" )
410
+
379
411
data_magic_cell [is.na(data_magic_cell )] = 0
380
412
381
413
# SCRABBLE
382
414
data_scrabble_cell = cor(data_scrabble , method = " pearson" )
415
+
383
416
data_scrabble_cell [is.na(data_scrabble_cell )] = 0
384
417
385
418
error_cell = matrix (0 , nrow = 6 , ncol = 1 )
419
+
386
420
error_cell [1 ] = calculate_similarity(data_true_cell , data_dropout_cell )
421
+
387
422
error_cell [2 ] = calculate_similarity(data_true_cell , data_drimpute_cell )
423
+
388
424
error_cell [3 ] = calculate_similarity(data_true_cell , data_scimpute_cell )
425
+
389
426
error_cell [4 ] = calculate_similarity(data_true_cell , data_magic_cell )
427
+
390
428
error_cell [5 ] = calculate_similarity(data_true_cell , data_scrabble_cell )
429
+
391
430
error_cell [6 ] = 1 - nnzero(data_dropout )/ length(data_dropout )
392
431
393
432
# define the error as a list
394
433
result <- list ()
434
+
395
435
result $ error <- error
436
+
396
437
result $ error_cell <- error_cell
438
+
397
439
result $ error_gene <- error_gene
398
440
399
441
return (result )
@@ -924,7 +966,9 @@ plot_cor_p <- function(data, name){
924
966
limit = c(min(c(data1 )),max(c(data1 )))
925
967
926
968
myPalette = colorRampPalette(rev(brewer.pal(11 , " Spectral" )))
969
+
927
970
colnames(data ) = NULL
971
+
928
972
rownames(data ) = NULL
929
973
930
974
# prepare the plot data
@@ -970,38 +1014,50 @@ plot_cor_HF <- function(dropout_index, rand_seed){
970
1014
971
1015
# set up the data
972
1016
data_true = data $ data_true
1017
+
973
1018
data_dropout = data $ data_raw
1019
+
974
1020
data_drimpute = data $ data_drimpute
1021
+
975
1022
data_scimpute = data $ data_scimpute
1023
+
976
1024
data_magic = data $ data_magic
1025
+
977
1026
data_scrabble = data $ data_scrabble
978
1027
979
1028
# gene-gene correlation
980
1029
# true data
981
1030
data_true_gene = cor(t(data_true ), method = " pearson" )
1031
+
982
1032
data_true_gene [is.na(data_true_gene )] = 0
983
1033
984
1034
# dropout data
985
1035
data_dropout_gene = cor(t(data_dropout ), method = " pearson" )
1036
+
986
1037
data_dropout_gene [is.na(data_dropout_gene )] = 0
987
1038
988
1039
# DrImpute data
989
1040
data_drimpute_gene = cor(t(data_drimpute ), method = " pearson" )
1041
+
990
1042
data_drimpute_gene [is.na(data_drimpute_gene )] = 0
991
1043
992
1044
# scImpute data
993
1045
data_scimpute_gene = cor(t(data_scimpute ), method = " pearson" )
1046
+
994
1047
data_scimpute_gene [is.na(data_scimpute_gene )] = 0
995
1048
996
1049
# MAGIC data
997
1050
data_magic_gene = cor(t(data_magic ), method = " pearson" )
1051
+
998
1052
data_magic_gene [is.na(data_magic_gene )] = 0
999
1053
1000
1054
# SCRABBLE data
1001
1055
data_scrabble_gene = cor(t(data_scrabble ), method = " pearson" )
1056
+
1002
1057
data_scrabble_gene [is.na(data_scrabble_gene )] = 0
1003
1058
1004
1059
p <- list ()
1060
+
1005
1061
pl <- list ()
1006
1062
1007
1063
pl [[1 ]] <- plot_cor_p(data_true_gene ," Gene: True Data" )
@@ -1021,26 +1077,32 @@ plot_cor_HF <- function(dropout_index, rand_seed){
1021
1077
# cell-cell correlation
1022
1078
# true data
1023
1079
data_true_cell = cor(data_true , method = " pearson" )
1080
+
1024
1081
data_true_cell [is.na(data_true_cell )] = 0
1025
1082
1026
1083
# dropout data
1027
1084
data_dropout_cell = cor(data_dropout , method = " pearson" )
1085
+
1028
1086
data_dropout_cell [is.na(data_dropout_cell )] = 0
1029
1087
1030
1088
# DrImpute data
1031
1089
data_drimpute_cell = cor(data_drimpute , method = " pearson" )
1090
+
1032
1091
data_drimpute_cell [is.na(data_drimpute_cell )] = 0
1033
1092
1034
1093
# scImpute data
1035
1094
data_scimpute_cell = cor(data_scimpute , method = " pearson" )
1095
+
1036
1096
data_scimpute_cell [is.na(data_scimpute_cell )] = 0
1037
1097
1038
1098
# MAGIC data
1039
1099
data_magic_cell = cor(data_magic , method = " pearson" )
1100
+
1040
1101
data_magic_cell [is.na(data_magic_cell )] = 0
1041
1102
1042
1103
# SCRABBLE
1043
1104
data_scrabble_cell = cor(data_scrabble , method = " pearson" )
1105
+
1044
1106
data_scrabble_cell [is.na(data_scrabble_cell )] = 0
1045
1107
1046
1108
pl <- list ()
0 commit comments