Skip to content

Commit b38cb3e

Browse files
committed
Bugfix and man data fix
1 parent 887e797 commit b38cb3e

File tree

5 files changed

+35
-285
lines changed

5 files changed

+35
-285
lines changed

R/annotation_utils.R

Lines changed: 17 additions & 224 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
2-
31
# Description:
42
# integrate clustirr with data from databases: VDJdb, tcr3d, mcpas-tcr
53
match_db <- function(cs, control) {
@@ -200,12 +198,10 @@ match_db <- function(cs, control) {
200198
return(cs)
201199
}
202200

203-
204201
get_annotation_dbs <- function() {
205202
return(c("vdjdb", "mcpas", "tcr3d"))
206203
}
207204

208-
209205
# Description:
210206
# integrate nodes with data from databases: VDJdb, tcr3d, mcpas-tcr
211207
get_node_ann <- function(ns, ag_key, db) {
@@ -287,8 +283,6 @@ get_node_ann <- function(ns, ag_key, db) {
287283
return(list(ann = m, vars = vars))
288284
}
289285

290-
291-
292286
# Get annotation database matches for visualization utilizies
293287
get_db_match <- function(ns, db, db_dist) {
294288

@@ -480,8 +474,6 @@ get_db_match <- function(ns, db, db_dist) {
480474
return(ns)
481475
}
482476

483-
484-
485477
# Description:
486478
get_beta_violins <- function(beta,
487479
node_summary,
@@ -637,12 +629,11 @@ get_beta_violins <- function(beta,
637629
violins = v))
638630
}
639631

640-
641632
# Description:
642633
get_beta_scatterplot <- function(beta,
643634
node_summary,
644-
ag_species = NULL,
645-
ag_genes = NULL,
635+
ag_species,
636+
ag_genes,
646637
db = "vdjdb",
647638
db_dist = 0,
648639
chain = "both") {
@@ -818,7 +809,6 @@ get_beta_scatterplot <- function(beta,
818809
scatterplots = v))
819810
}
820811

821-
822812
get_honeycombs <- function(com) {
823813
n <- ncol(com)
824814
gs <- vector(mode = "list", length = n*(n-1)/2)
@@ -862,212 +852,15 @@ get_honeycombs <- function(com) {
862852
return(gs)
863853
}
864854

865-
866-
867-
868855
# Description:
869-
get_ag_summary <- function(communities,
870-
node_summary,
871-
ag_species = NULL,
872-
ag_genes = NULL,
856+
get_ag_summary <- function(node_summary,
857+
ag_species,
858+
ag_genes,
873859
db = "vdjdb",
874860
db_dist = 0,
875861
chain = "both") {
876862

877-
match_db <- function(ns, db, db_dist) {
878-
879-
get_db_info <- function(ns, db, db_type, chain, index) {
880-
881-
get_vdjdb_info <- function(x, ns, db, chain, index) {
882-
if(x == "") {
883-
return("")
884-
}
885-
xs <- as.numeric(unlist(strsplit(index[x], split = "\\|")))
886-
887-
return(paste0("<db:VDJdb|chain:", chain, "|",
888-
"Antigen_species:",
889-
paste0(db[xs, "Antigen_species"], collapse = ';'),"|",
890-
"Antigen_gene:",
891-
paste0(db[xs, "Antigen_gene"], collapse = ';'), "|",
892-
"CDR3_species:",
893-
paste0(db[xs, "CDR3_species"], collapse = ';'), "|",
894-
"Reference:",
895-
paste0(db[xs, "PMID"], collapse = ';'), ">"))
896-
}
897-
898-
get_tcr3d_info <- function(x, ns, db, chain, index) {
899-
if(x == "") {
900-
return("")
901-
}
902-
xs <- as.numeric(unlist(strsplit(index[x], split = "\\|")))
903-
904-
return(paste0("<db:tcr3d|chain:", chain, "|",
905-
"Antigen_species:",
906-
paste0(db[xs, "Antigen_species"], collapse = ';'),"|",
907-
"Antigen_gene:",
908-
paste0(db[xs, "Antigen_gene"], collapse = ';'), "|",
909-
"Reference:",
910-
paste0(db[xs, "Reference"], collapse = ';'), ">"))
911-
}
912-
913-
get_mcpas_info <- function(x, ns, db, chain, index) {
914-
if(x == "") {
915-
return("")
916-
}
917-
xs <- as.numeric(unlist(strsplit(index[x], split = "\\|")))
918-
919-
return(paste0("<db:mcpas|chain:", chain, "|",
920-
"Antigen_species:",
921-
paste0(db[xs, "Antigen_species"], collapse = ';'),"|",
922-
"Antigen_gene:",
923-
paste0(db[xs, "Antigen_gene"], collapse = ';'), "|",
924-
"CDR3_species:",
925-
paste0(db[xs, "CDR3_species"], collapse = ';'), "|",
926-
"Reference:",
927-
paste0(db[xs, "Reference"], collapse = ';'), ">"))
928-
}
929-
930-
if(db_type == "vdjdb") {
931-
return(vapply(X = 1:nrow(ns),
932-
FUN = get_vdjdb_info,
933-
ns = ns,
934-
db = db[,c(chain, "CDR3_species", "Antigen_species",
935-
"Antigen_gene", "Reference")],
936-
chain = chain,
937-
index = index,
938-
FUN.VALUE = character(1)))
939-
940-
return(unlist(lapply(X=which(ns[,paste0("db_vdjdb_", chain)]==1),
941-
ns=ns,
942-
db=db[,c(chain, "CDR3_species",
943-
"Antigen_species",
944-
"Antigen_gene",
945-
"Reference")],
946-
chain = chain,
947-
index = index,
948-
FUN=get_vdjdb_info)))
949-
}
950-
if(db_type == "tcr3d") {
951-
return(unlist(lapply(X = 1:nrow(ns),
952-
ns = ns,
953-
db = db[,c(chain, "Antigen_species",
954-
"Antigen_gene",
955-
"Reference")],
956-
chain = chain,
957-
index = index,
958-
FUN = get_tcr3d_info)))
959-
}
960-
if(db_type == "mcpas") {
961-
return(unlist(lapply(X = 1:nrow(ns),
962-
ns = ns,
963-
db = db[,c(chain, "CDR3_species",
964-
"Antigen_species",
965-
"Antigen_gene",
966-
"Reference")],
967-
chain = chain,
968-
index = index,
969-
FUN = get_mcpas_info)))
970-
}
971-
}
972-
973-
get_db_index <- function(x, a, b, d) {
974-
if(d==0) {
975-
z <- which(a==b[x])
976-
} else {
977-
z <- which(stringdist(a=a,b=b[x],method="lv")<=d)
978-
}
979-
if(length(z)!=0) {
980-
return(paste0(z, collapse = '|'))
981-
}
982-
return('')
983-
}
984-
985-
load_data <- function(d) {
986-
987-
e <- new.env()
988-
if(d=="vdjdb") {
989-
name <- data("vdjdb", package = "ClustIRR", envir = e)[1]
990-
}
991-
if(d=="mcpas") {
992-
name <- data("mcpas", package = "ClustIRR", envir = e)[1]
993-
}
994-
if(d=="tcr3d") {
995-
name <- data("tcr3d", package = "ClustIRR", envir = e)[1]
996-
}
997-
return(e[[name]])
998-
}
999-
1000-
# what type of chaisn are there in the data -> use them to match DBs
1001-
chains <- get_chains(x = colnames(ns))
1002-
1003-
# load DBs and pack into one list -> db
1004-
db_name <- db
1005-
db <- vector(mode = "list", length = 1)
1006-
names(db) <- db_name
1007-
db[[1]] <- load_data(d = names(db)[1])
1008-
1009-
for(chain in chains) {
1010-
key_db <- paste0("db_", db_name, "_", chain)
1011-
key_index <- paste0("index_", db_name, "_", chain)
1012-
key_info <- paste0("info_", db_name, "_", chain)
1013-
1014-
ns[, key_db] <- 0
1015-
ns[, key_index] <- ''
1016-
ns[, key_info] <- ''
1017-
1018-
# insert indices
1019-
ns[, key_index] <- vapply(X = 1:nrow(ns),
1020-
a = db[[db_name]][, chain],
1021-
b = ns[, chain],
1022-
d = db_dist,
1023-
FUN.VALUE = character(1),
1024-
FUN = get_db_index)
1025-
1026-
ns[ns[, key_index]!='', key_db] <- 1
1027-
ns[, key_info] <- get_db_info(ns = ns,
1028-
db_type = db_name,
1029-
db = db[[db_name]],
1030-
index = ns[, key_index],
1031-
chain = chain)
1032-
ns[, key_index] <- NULL
1033-
}
1034-
1035-
# get aggregate infos
1036-
x <- ns[, which(regexpr(pattern = "info_", text = colnames(ns))!=-1),
1037-
drop=FALSE]
1038-
a <- apply(X = x, MARGIN = 1, FUN = function(x, key) {
1039-
if(all(x=="")) {
1040-
return(list(ag_species = '', ag_gene = ''))
1041-
}
1042-
1043-
ag_species <- c()
1044-
ag_gene <- c()
1045-
x <- x[x!=""]
1046-
for(i in 1:length(x)) {
1047-
y <- unlist(strsplit(x = x[i], split = "\\|"))
1048-
ag_species <- c(ag_species, unlist(strsplit(x = y[3], split = '\\;')))
1049-
ag_gene <- c(ag_gene, unlist(strsplit(x = y[4], split = '\\;')))
1050-
}
1051-
ag_species <- paste0(unique(gsub(pattern = "Antigen_species\\:",
1052-
replacement = '', x = ag_species)),
1053-
collapse = ',')
1054-
ag_gene <- paste0(unique(gsub(pattern = "Antigen_gene\\:",
1055-
replacement = '', x = ag_gene)),
1056-
collapse = ',')
1057-
1058-
return(list(ag_species = ag_species, ag_gene = ag_gene))
1059-
})
1060-
1061-
ns$Ag_species <- unlist(lapply(X = a, FUN = function(x){x[["ag_species"]]}))
1062-
ns$Ag_gene <- unlist(lapply(X = a, FUN = function(x) {x[["ag_gene"]]}))
1063-
1064-
return(ns)
1065-
}
1066-
1067-
# input checks, TODO: pack
1068-
if(missing(communities)) {
1069-
stop("communities is missing")
1070-
}
863+
# input checks
1071864
if(missing(node_summary)) {
1072865
stop("node_summary is missing")
1073866
}
@@ -1124,15 +917,19 @@ get_ag_summary <- function(communities,
1124917
n$repertoire_size <- n$clone_size
1125918
n$clone_size <- NULL
1126919

1127-
ns <- ns[ns$community %in% communities,]
1128-
1129920
# construct antigen key
1130-
ag_key <- rbind(data.frame(ag = ag_species, type = "species"),
1131-
data.frame(ag = ag_genes, type = "gene"))
1132-
ag_key <- ag_key[complete.cases(ag_key),]
1133-
921+
# if not provided -> create only one entry ''
922+
if(all(ag_species=="")&all(ag_genes=="")) {
923+
ag_key <- data.frame(ag = unique(ag_species), type = "species")
924+
ag_key$id <- "I1"
925+
} else {
926+
ag_key <- rbind(data.frame(ag = unique(ag_species), type = "species"),
927+
data.frame(ag = unique(ag_genes), type = "gene"))
928+
ag_key <- ag_key[complete.cases(ag_key),]
929+
ag_key$id <- paste0("I", 1:nrow(ag_key))
930+
}
1134931

1135-
ns <- match_db(ns = ns, db = db, db_dist = db_dist)
932+
ns <- get_db_match(ns = ns, db = db, db_dist = db_dist)
1136933
na <- get_node_ann(ns = ns, ag_key = ag_key, db = db)
1137934

1138935
# Create the list to fill d with 0s
@@ -1153,7 +950,3 @@ get_ag_summary <- function(communities,
1153950

1154951
return(d)
1155952
}
1156-
1157-
1158-
1159-

man/decode_communities.Rd

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,18 @@ within communities and summarizes its features.
1111
\usage{
1212
decode_communities(community_id,
1313
graph,
14-
edge_attr,
15-
node_attr)
14+
edge_at,
15+
node_at)
1616
}
1717
\arguments{
1818
\item{graph}{\code{igraph} object that has been analyzed by graph-based
1919
community detection methods as implemented in \code{detect_communities}}
2020
\item{community_id}{which community should be decoded?}
21-
\item{edge_attr}{which edge attributes should be used to decode the community}
22-
\item{iterations}{which node attributes should be used to decode the community}
21+
\item{edge_at}{which edge attributes should be used to decode the community}
22+
\item{node_at}{which node attributes should be used to decode the community}
2323
}
2424
\value{
25-
The output is a list with the folling elements:
26-
\item{community_occupancy_matrix}{matrix, rows are communities, columns
27-
are repertoires, matrix entries are numbers of cells in each community and
28-
repertoire.}
29-
\item{community_summary}{data.frame, rows are communities and their properties
30-
are provided as columns.}
31-
\item{node_summary}{data.frame, rows are nodes (clones) and their properties
32-
are provided as columnscontains all user provided.}
33-
\item{graph}{igraph object, processed graph object.}
34-
\item{graph_structure_quality}{graph modularity and quality (only for Leiden)
35-
measure of the strength of division of the graph into communities.}
36-
\item{input_config}{list, inputs provided as list.}
25+
3726
}
3827
\examples{
3928

0 commit comments

Comments
 (0)