Skip to content

Commit 6d8fbbf

Browse files
committed
decomposing community done
1 parent 92fa51b commit 6d8fbbf

File tree

4 files changed

+113
-26
lines changed

4 files changed

+113
-26
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ClustIRR
22
Type: Package
33
Title: Clustering of immune receptor repertoires
4-
Version: 1.5.48
4+
Version: 1.5.50
55
Authors@R: c(
66
person("Simo", "Kitanovski", email = "simokitanovski@gmail.com",
77
role = c("aut", "cre"), comment=c(ORCID="0000-0003-2909-5376")),

R/community.R

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,6 @@ decode_communities <- function(community_id,
438438
V(sg)$components <- components(graph = sg)$membership
439439
V(sg)$component_id <- paste0(V(sg)$key, '|',
440440
V(sg)$components)
441-
V(sg)$component_id <- as.numeric(as.factor(V(sg)$component_id))
442441
return(disjoint_union(lapply(
443442
X = unique(V(sg)$component_id), g = sg,
444443
FUN = function(x, g) {
@@ -447,7 +446,15 @@ decode_communities <- function(community_id,
447446
})))
448447
})
449448
sgs <- disjoint_union(sgs)
450-
return(sgs)
449+
V(sgs)$component_id <- as.numeric(as.factor(V(sgs)$component_id))
450+
451+
sgs_stat <- get_component_stats(x = sgs)
452+
453+
vs <- as_data_frame(x = sgs, what = "vertices")
454+
455+
return(list(community_graph = sgs,
456+
component_stats = sgs_stat,
457+
node_summary = vs))
451458
}
452459
else {
453460
sgs <- lapply(
@@ -469,8 +476,67 @@ decode_communities <- function(community_id,
469476
})))
470477
})
471478
sgs <- disjoint_union(sgs)
472-
return(sgs)
473479

480+
sgs_stat <- get_component_stats(x = sgs)
481+
482+
vs <- as_data_frame(x = sgs, what = "vertices")
483+
484+
return(list(community_graph = sgs,
485+
component_stats = sgs_stat,
486+
node_summary = vs))
474487
}
475488
}
476489

490+
491+
get_component_stats <- function(x) {
492+
493+
# what is he
494+
community_id <- V(x)$community[1]
495+
496+
# number of nodes per component
497+
count_n <- table(V(x)$component_id)
498+
499+
# components has more than one node
500+
i <- which(count_n>1)
501+
stats_components <- c()
502+
if(length(i)>0) {
503+
stats_components <- lapply(X = names(i), g = x, FUN = function(x, g) {
504+
o <- subgraph(g, vids = which(V(g)$component_id==x))
505+
506+
ncweight <- E(o)$ncweight
507+
if(length(ncweight)==0) {
508+
ncweight <- NA
509+
}
510+
nweight <- E(o)$nweight
511+
if(length(nweight)==0) {
512+
nweight <- NA
513+
}
514+
515+
return(data.frame(component_id = x,
516+
community = V(o)$community[1],
517+
mean_ncweight = mean(ncweight),
518+
mean_nweight = mean(nweight),
519+
n_nodes = length(o),
520+
n_edges = length(E(o)),
521+
n_clique_edges = length(o)*(length(o)-1)/2,
522+
diameter = diameter(o)))
523+
})
524+
stats_components <- do.call(rbind, stats_components)
525+
}
526+
527+
i <- which(count_n==1)
528+
stats_singletons <- c()
529+
if(length(i)>0) {
530+
stats_singletons <- data.frame(component_id = names(i),
531+
community = community_id,
532+
mean_ncweight = NA,
533+
mean_nweight = NA,
534+
n_nodes = 1,
535+
n_edges = 0,
536+
n_clique_edges = 0,
537+
diameter = 0)
538+
}
539+
540+
return(rbind(stats_components, stats_singletons))
541+
}
542+

man/decode_communities.Rd

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@ have the same attribute values among \bold{ALL} provided attributes will
4242
be treated as a subcomponent.}
4343
}
4444
\value{
45-
The output is a "filtered" igraph object.
45+
The output is a list with the following elements
46+
\itemite{
47+
\item community_graph, "filtered" igraph object
48+
\item component_stats, data.frame with summary about each connected component
49+
\item node_summary, data.frame with summary about each node
50+
}
4651
}
4752
\examples{
4853
# load package input data
@@ -76,9 +81,9 @@ library(igraph)
7681
edge_attr_names(graph = gcd$graph)
7782

7883
# For instance, the following edge-filter will instruct ClustIRR to keep
79-
# edges with: edge attributes: nweight>=3 \bold{AND} ncweight>=3
80-
edge_filter <- rbind(data.frame(name = "nweight", value = 3, operation = ">="),
81-
data.frame(name = "ncweight", value = 3, operation = ">="))
84+
# edges with: edge attributes: nweight>=8 \bold{AND} ncweight>=8
85+
edge_filter <- rbind(data.frame(name = "nweight", value = 8, operation = ">="),
86+
data.frame(name = "ncweight", value = 8, operation = ">="))
8287

8388
# In addition, we can construct filters based on the following node attributes:
8489
vertex_attr_names(graph = gcd$graph)
@@ -96,10 +101,14 @@ c1 <- decode_communities(community_id = 1,
96101

97102
# Plot resulting igraph
98103
par(mar = c(0, 0, 0, 0))
99-
plot(c1, vertex.size = 10)
104+
plot(c1$community_graph, vertex.size = 10)
100105

101106
# Now look at node attributes
102-
as_data_frame(x = c1, what = "vertices")[,c("name", "component_id",
103-
"CDR3b", "CDR3a", "Ag_gene")]
107+
as_data_frame(x = c1$community_graph, what = "vertices")
108+
[,c("name", "component_id", "CDR3b", "CDR3a", "Ag_gene")]
109+
110+
str(c1$component_stats)
111+
112+
str(c1$node_summary)
104113
}
105114

vignettes/User_manual.Rmd

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ cl <- clustirr(s = tcr_reps, meta = meta, control = list(gmi = 0.7))
201201

202202
The output **complex** list with three elements:
203203

204-
1. `graph` = the joint graph $J$
204+
1. `graph` = the joint graph $J$
205205

206-
2. `clust_irrs` = list with S4 `clust_irr` objects one for each TCR
206+
2. `clust_irrs` = list with S4 `clust_irr` objects one for each TCR
207207
repertoire
208208

209209
3. `multigraph` = logical value which tell us whether $J$ contains
@@ -389,7 +389,8 @@ wrap_plots(honeycomb, nrow = 1)+
389389
```
390390

391391
Also see `community_summary`. In the data.frame `wide` we provide
392-
community summaries in each community across all TCR repertoires, including:
392+
community summaries in each community across all TCR repertoires,
393+
including:
393394

394395
- `clones_a`, `clone_b`, `clone_c`, `clones_n`: the frequency of
395396
clones in the community coming from repertoire `a`, `b`, `c` and in
@@ -406,8 +407,8 @@ community summaries in each community across all TCR repertoires, including:
406407
kable(head(gcd$community_summary$wide), digits = 1, row.names = FALSE)
407408
```
408409

409-
In the data.frame `tall` we provide community and repertoire
410-
summaries in each row.
410+
In the data.frame `tall` we provide community and repertoire summaries
411+
in each row.
411412

412413
```{r}
413414
kable(head(gcd$community_summary$tall), digits = 1, row.names = FALSE)
@@ -448,11 +449,11 @@ edge_attr_names(graph = gcd$graph)
448449
```
449450

450451
The following edge-filter will instruct ClustIRR to keep edges with with
451-
edge attributes: nweight $>=$ 4 **AND** ncweight $>=$ 4
452+
edge attributes: nweight $>=$ 8 **AND** ncweight $>=$ 8
452453

453454
```{r}
454-
edge_filter <- rbind(data.frame(name = "nweight", value = 4, operation = ">="),
455-
data.frame(name = "ncweight", value = 4, operation = ">="))
455+
edge_filter <- rbind(data.frame(name = "nweight", value = 8, operation = ">="),
456+
data.frame(name = "ncweight", value = 8, operation = ">="))
456457
```
457458

458459
```{r}
@@ -477,17 +478,30 @@ c8 <- decode_communities(community_id = 8,
477478

478479
```{r, fig.width=6, fig.height=4}
479480
par(mar = c(0, 0, 0, 0))
480-
plot(c8, vertex.size = 10)
481+
plot(c8$community, vertex.size = 10)
481482
```
482483

483484
```{r}
484-
kable(as_data_frame(x = c8, what = "vertices")[, c("name", "key",
485-
"CDR3b", "TRBV",
486-
"TRBJ", "CDR3a",
487-
"TRAV", "TRAJ")],
485+
kable(as_data_frame(x = c8$community, what = "vertices")
486+
[, c("name", "component_id", "CDR3b", "TRBV",
487+
"TRBJ", "CDR3a", "TRAV", "TRAJ")],
488488
row.names = FALSE)
489489
```
490490

491+
... or we can summarize the properties of each component in the next
492+
table as rows with:
493+
494+
- component id, community id (=8 in this example because this is what
495+
we selected)
496+
- mean edge weights (for all core and complete CDR3 pairs)
497+
- number of nodes, number of edges and expected number edges if the
498+
component were a clique
499+
- diameter of the component
500+
501+
```{r}
502+
kable(c8$component_stats, row.names = FALSE)
503+
```
504+
491505
## **Step 5.** differential community occupancy (DCO) with `dco`
492506

493507
Do we see **expanding** or **contracting** communities in a given
@@ -604,7 +618,6 @@ community occupancy (DCO):
604618
\delta^{a-b}_i = \beta^a_i - \beta^b_i
605619
\end{align}
606620
```
607-
608621
Importantly, `r Biocpkg("ClustIRR")` always computes both contrasts ($a$
609622
vs. $b$ and $b$ vs. $a$): $\delta^{a-b}_i$ and $\delta^{b-a}_i$.
610623

@@ -629,7 +642,6 @@ quantifies absolute differences in community probabilities:
629642
\mathrm{softmax}(\alpha + \beta^b)
630643
\end{align}
631644
```
632-
633645
Again, both contrasts are computed: $\epsilon^{a-b}_i$ and
634646
$\epsilon^{b-a}_i$.
635647

0 commit comments

Comments
 (0)