1-
2-
31# Description:
42# integrate clustirr with data from databases: VDJdb, tcr3d, mcpas-tcr
53match_db <- function (cs , control ) {
@@ -200,12 +198,10 @@ match_db <- function(cs, control) {
200198 return (cs )
201199}
202200
203-
204201get_annotation_dbs <- function () {
205202 return (c(" vdjdb" , " mcpas" , " tcr3d" ))
206203}
207204
208-
209205# Description:
210206# integrate nodes with data from databases: VDJdb, tcr3d, mcpas-tcr
211207get_node_ann <- function (ns , ag_key , db ) {
@@ -287,8 +283,6 @@ get_node_ann <- function(ns, ag_key, db) {
287283 return (list (ann = m , vars = vars ))
288284}
289285
290-
291-
292286# Get annotation database matches for visualization utilizies
293287get_db_match <- function (ns , db , db_dist ) {
294288
@@ -480,8 +474,6 @@ get_db_match <- function(ns, db, db_dist) {
480474 return (ns )
481475}
482476
483-
484-
485477# Description:
486478get_beta_violins <- function (beta ,
487479 node_summary ,
@@ -637,12 +629,11 @@ get_beta_violins <- function(beta,
637629 violins = v ))
638630}
639631
640-
641632# Description:
642633get_beta_scatterplot <- function (beta ,
643634 node_summary ,
644- ag_species = NULL ,
645- ag_genes = NULL ,
635+ ag_species ,
636+ ag_genes ,
646637 db = " vdjdb" ,
647638 db_dist = 0 ,
648639 chain = " both" ) {
@@ -818,7 +809,6 @@ get_beta_scatterplot <- function(beta,
818809 scatterplots = v ))
819810}
820811
821-
822812get_honeycombs <- function (com ) {
823813 n <- ncol(com )
824814 gs <- vector(mode = " list" , length = n * (n - 1 )/ 2 )
@@ -862,212 +852,15 @@ get_honeycombs <- function(com) {
862852 return (gs )
863853}
864854
865-
866-
867-
868855# Description:
869- get_ag_summary <- function (communities ,
870- node_summary ,
871- ag_species = NULL ,
872- ag_genes = NULL ,
856+ get_ag_summary <- function (node_summary ,
857+ ag_species ,
858+ ag_genes ,
873859 db = " vdjdb" ,
874860 db_dist = 0 ,
875861 chain = " both" ) {
876862
877- match_db <- function (ns , db , db_dist ) {
878-
879- get_db_info <- function (ns , db , db_type , chain , index ) {
880-
881- get_vdjdb_info <- function (x , ns , db , chain , index ) {
882- if (x == " " ) {
883- return (" " )
884- }
885- xs <- as.numeric(unlist(strsplit(index [x ], split = " \\ |" )))
886-
887- return (paste0(" <db:VDJdb|chain:" , chain , " |" ,
888- " Antigen_species:" ,
889- paste0(db [xs , " Antigen_species" ], collapse = ' ;' )," |" ,
890- " Antigen_gene:" ,
891- paste0(db [xs , " Antigen_gene" ], collapse = ' ;' ), " |" ,
892- " CDR3_species:" ,
893- paste0(db [xs , " CDR3_species" ], collapse = ' ;' ), " |" ,
894- " Reference:" ,
895- paste0(db [xs , " PMID" ], collapse = ' ;' ), " >" ))
896- }
897-
898- get_tcr3d_info <- function (x , ns , db , chain , index ) {
899- if (x == " " ) {
900- return (" " )
901- }
902- xs <- as.numeric(unlist(strsplit(index [x ], split = " \\ |" )))
903-
904- return (paste0(" <db:tcr3d|chain:" , chain , " |" ,
905- " Antigen_species:" ,
906- paste0(db [xs , " Antigen_species" ], collapse = ' ;' )," |" ,
907- " Antigen_gene:" ,
908- paste0(db [xs , " Antigen_gene" ], collapse = ' ;' ), " |" ,
909- " Reference:" ,
910- paste0(db [xs , " Reference" ], collapse = ' ;' ), " >" ))
911- }
912-
913- get_mcpas_info <- function (x , ns , db , chain , index ) {
914- if (x == " " ) {
915- return (" " )
916- }
917- xs <- as.numeric(unlist(strsplit(index [x ], split = " \\ |" )))
918-
919- return (paste0(" <db:mcpas|chain:" , chain , " |" ,
920- " Antigen_species:" ,
921- paste0(db [xs , " Antigen_species" ], collapse = ' ;' )," |" ,
922- " Antigen_gene:" ,
923- paste0(db [xs , " Antigen_gene" ], collapse = ' ;' ), " |" ,
924- " CDR3_species:" ,
925- paste0(db [xs , " CDR3_species" ], collapse = ' ;' ), " |" ,
926- " Reference:" ,
927- paste0(db [xs , " Reference" ], collapse = ' ;' ), " >" ))
928- }
929-
930- if (db_type == " vdjdb" ) {
931- return (vapply(X = 1 : nrow(ns ),
932- FUN = get_vdjdb_info ,
933- ns = ns ,
934- db = db [,c(chain , " CDR3_species" , " Antigen_species" ,
935- " Antigen_gene" , " Reference" )],
936- chain = chain ,
937- index = index ,
938- FUN.VALUE = character (1 )))
939-
940- return (unlist(lapply(X = which(ns [,paste0(" db_vdjdb_" , chain )]== 1 ),
941- ns = ns ,
942- db = db [,c(chain , " CDR3_species" ,
943- " Antigen_species" ,
944- " Antigen_gene" ,
945- " Reference" )],
946- chain = chain ,
947- index = index ,
948- FUN = get_vdjdb_info )))
949- }
950- if (db_type == " tcr3d" ) {
951- return (unlist(lapply(X = 1 : nrow(ns ),
952- ns = ns ,
953- db = db [,c(chain , " Antigen_species" ,
954- " Antigen_gene" ,
955- " Reference" )],
956- chain = chain ,
957- index = index ,
958- FUN = get_tcr3d_info )))
959- }
960- if (db_type == " mcpas" ) {
961- return (unlist(lapply(X = 1 : nrow(ns ),
962- ns = ns ,
963- db = db [,c(chain , " CDR3_species" ,
964- " Antigen_species" ,
965- " Antigen_gene" ,
966- " Reference" )],
967- chain = chain ,
968- index = index ,
969- FUN = get_mcpas_info )))
970- }
971- }
972-
973- get_db_index <- function (x , a , b , d ) {
974- if (d == 0 ) {
975- z <- which(a == b [x ])
976- } else {
977- z <- which(stringdist(a = a ,b = b [x ],method = " lv" )< = d )
978- }
979- if (length(z )!= 0 ) {
980- return (paste0(z , collapse = ' |' ))
981- }
982- return (' ' )
983- }
984-
985- load_data <- function (d ) {
986-
987- e <- new.env()
988- if (d == " vdjdb" ) {
989- name <- data(" vdjdb" , package = " ClustIRR" , envir = e )[1 ]
990- }
991- if (d == " mcpas" ) {
992- name <- data(" mcpas" , package = " ClustIRR" , envir = e )[1 ]
993- }
994- if (d == " tcr3d" ) {
995- name <- data(" tcr3d" , package = " ClustIRR" , envir = e )[1 ]
996- }
997- return (e [[name ]])
998- }
999-
1000- # what type of chaisn are there in the data -> use them to match DBs
1001- chains <- get_chains(x = colnames(ns ))
1002-
1003- # load DBs and pack into one list -> db
1004- db_name <- db
1005- db <- vector(mode = " list" , length = 1 )
1006- names(db ) <- db_name
1007- db [[1 ]] <- load_data(d = names(db )[1 ])
1008-
1009- for (chain in chains ) {
1010- key_db <- paste0(" db_" , db_name , " _" , chain )
1011- key_index <- paste0(" index_" , db_name , " _" , chain )
1012- key_info <- paste0(" info_" , db_name , " _" , chain )
1013-
1014- ns [, key_db ] <- 0
1015- ns [, key_index ] <- ' '
1016- ns [, key_info ] <- ' '
1017-
1018- # insert indices
1019- ns [, key_index ] <- vapply(X = 1 : nrow(ns ),
1020- a = db [[db_name ]][, chain ],
1021- b = ns [, chain ],
1022- d = db_dist ,
1023- FUN.VALUE = character (1 ),
1024- FUN = get_db_index )
1025-
1026- ns [ns [, key_index ]!= ' ' , key_db ] <- 1
1027- ns [, key_info ] <- get_db_info(ns = ns ,
1028- db_type = db_name ,
1029- db = db [[db_name ]],
1030- index = ns [, key_index ],
1031- chain = chain )
1032- ns [, key_index ] <- NULL
1033- }
1034-
1035- # get aggregate infos
1036- x <- ns [, which(regexpr(pattern = " info_" , text = colnames(ns ))!= - 1 ),
1037- drop = FALSE ]
1038- a <- apply(X = x , MARGIN = 1 , FUN = function (x , key ) {
1039- if (all(x == " " )) {
1040- return (list (ag_species = ' ' , ag_gene = ' ' ))
1041- }
1042-
1043- ag_species <- c()
1044- ag_gene <- c()
1045- x <- x [x != " " ]
1046- for (i in 1 : length(x )) {
1047- y <- unlist(strsplit(x = x [i ], split = " \\ |" ))
1048- ag_species <- c(ag_species , unlist(strsplit(x = y [3 ], split = ' \\ ;' )))
1049- ag_gene <- c(ag_gene , unlist(strsplit(x = y [4 ], split = ' \\ ;' )))
1050- }
1051- ag_species <- paste0(unique(gsub(pattern = " Antigen_species\\ :" ,
1052- replacement = ' ' , x = ag_species )),
1053- collapse = ' ,' )
1054- ag_gene <- paste0(unique(gsub(pattern = " Antigen_gene\\ :" ,
1055- replacement = ' ' , x = ag_gene )),
1056- collapse = ' ,' )
1057-
1058- return (list (ag_species = ag_species , ag_gene = ag_gene ))
1059- })
1060-
1061- ns $ Ag_species <- unlist(lapply(X = a , FUN = function (x ){x [[" ag_species" ]]}))
1062- ns $ Ag_gene <- unlist(lapply(X = a , FUN = function (x ) {x [[" ag_gene" ]]}))
1063-
1064- return (ns )
1065- }
1066-
1067- # input checks, TODO: pack
1068- if (missing(communities )) {
1069- stop(" communities is missing" )
1070- }
863+ # input checks
1071864 if (missing(node_summary )) {
1072865 stop(" node_summary is missing" )
1073866 }
@@ -1124,15 +917,19 @@ get_ag_summary <- function(communities,
1124917 n $ repertoire_size <- n $ clone_size
1125918 n $ clone_size <- NULL
1126919
1127- ns <- ns [ns $ community %in% communities ,]
1128-
1129920 # construct antigen key
1130- ag_key <- rbind(data.frame (ag = ag_species , type = " species" ),
1131- data.frame (ag = ag_genes , type = " gene" ))
1132- ag_key <- ag_key [complete.cases(ag_key ),]
1133-
921+ # if not provided -> create only one entry ''
922+ if (all(ag_species == " " )& all(ag_genes == " " )) {
923+ ag_key <- data.frame (ag = unique(ag_species ), type = " species" )
924+ ag_key $ id <- " I1"
925+ } else {
926+ ag_key <- rbind(data.frame (ag = unique(ag_species ), type = " species" ),
927+ data.frame (ag = unique(ag_genes ), type = " gene" ))
928+ ag_key <- ag_key [complete.cases(ag_key ),]
929+ ag_key $ id <- paste0(" I" , 1 : nrow(ag_key ))
930+ }
1134931
1135- ns <- match_db (ns = ns , db = db , db_dist = db_dist )
932+ ns <- get_db_match (ns = ns , db = db , db_dist = db_dist )
1136933 na <- get_node_ann(ns = ns , ag_key = ag_key , db = db )
1137934
1138935 # Create the list to fill d with 0s
@@ -1153,7 +950,3 @@ get_ag_summary <- function(communities,
1153950
1154951 return (d )
1155952}
1156-
1157-
1158-
1159-
0 commit comments