Skip to content

Commit 234d42c

Browse files
author
mase5
committed
Add feature to extract the gene expression matrix of a given cluster. Update tutorial. Update to version 0.3.0.
1 parent 34956c5 commit 234d42c

12 files changed

+341
-42
lines changed

DESCRIPTION

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
Package: SCopeLoomR
22
Type: Package
3-
Title: Build .loom files compatible with SCope
4-
Version: 0.2.2
3+
Title: Build .loom files (compatible with SCope) and extract data from .loom files.
4+
Version: 0.3.0
55
Author: mase5
66
Maintainer: mase5 <mdewaegeneer@gmail.com>
77
Description: R package to build generic .loom files aligning with the default naming convention of the .loom format and
8-
to integrate other data types e.g.: regulons (SCENIC), clusters from Seurat, ...
9-
Imports: hdf5r, rjson, utils, methods, base, base64enc, igraph
8+
to integrate other data types e.g.: regulons (SCENIC), clusters from Seurat, ... The package can also be used to extract
9+
data from .loom files.
10+
Imports: hdf5r, rjson, utils, methods, base, base64enc, igraph, plyr, rlist
1011
Suggests: seurat, stringr
1112
License: Apache-2
1213
Encoding: UTF-8

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ export(create_trajectory_from_monocle)
2020
export(finalize)
2121
export(flush)
2222
export(get_cell_ids)
23+
export(get_cluster_dgem_by_name)
24+
export(get_cluster_info_by_cluster_name)
25+
export(get_clustering_by_id)
26+
export(get_clusterings)
2327
export(get_col_attr_by_key)
2428
export(get_default_embedding)
2529
export(get_dgem)

R/loom.R

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ add_global_md_clustering<-function(loom
233233
return (list(id = cluster.id
234234
, description = description))
235235
})
236-
clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
236+
clusterings<-get_clusterings(loom = loom)
237237
clustering<-list(id = id
238238
, group = group
239239
, name = name
@@ -584,7 +584,7 @@ add_seurat_clustering<-function(loom
584584
append_clustering_update_ca<-function(loom
585585
, clustering.id
586586
, clustering) {
587-
ca.clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
587+
ca.clusterings<-get_clusterings(loom = loom)
588588
colnames(clustering)<-as.character(clustering.id)
589589
# Append this clustering
590590
ca.clusterings<-cbind(ca.clusterings, clustering)
@@ -695,7 +695,7 @@ add_annotated_clustering<-function(loom
695695
# Adding the clustering data
696696
if(col_attrs_exists_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)) {
697697
print(paste(CA_CLUSTERINGS_NAME, "already exists..."))
698-
ca.clusterings<-get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME)
698+
ca.clusterings<-get_clusterings(loom = loom)
699699
# Set the clustering id
700700
id<-ncol(ca.clusterings) # n clusterings (start at 0)
701701
clustering<-data.frame("x" = as.integer(as.character(x = clusters)))
@@ -1437,3 +1437,104 @@ get_default_embedding<-function(loom) {
14371437
return (loom[["col_attrs"]][[CA_EMBEDDING_NAME]])
14381438
}
14391439

1440+
#'@title get_clustering_idx_by_cluster_name
1441+
#'@description Get index of the clutering related to the given cluster.name.
1442+
#'@param loom The loom file handler.
1443+
#'@param cluster.name The name of the cluster.
1444+
#'@return The index of the clustering in the clusterings metadata global attribute corresponding to the clustering where the given cluster.name is found.
1445+
get_clustering_idx_by_cluster_name<-function(loom
1446+
, cluster.name) {
1447+
library(rlist)
1448+
# Get global meta data
1449+
md<-get_global_meta_data(loom = loom)
1450+
# Unlist the nested meta data tree
1451+
tmp<-list.flatten(md$clusterings)
1452+
# Look for the given cluster.name
1453+
idx = match(x = cluster.name, table = tmp)
1454+
if(is.na(idx)) {
1455+
return (idx)
1456+
}
1457+
# Reverse search from idx till the first key id is found
1458+
for(i in idx:1) {
1459+
if(names(tmp)[i] == "id") id<-tmp[[i]]; break
1460+
}
1461+
return (list.findi(.data = md$clusterings, id == id))
1462+
}
1463+
1464+
#'@title get_cluster_info_by_cluster_name
1465+
#'@description Get cluster information (Clustering ID, Clustering Name, Clustering Group, Cluster ID, Cluster Name) of the given cluster.name.
1466+
#'@param loom The loom file handler.
1467+
#'@param cluster.name The name of the cluster.
1468+
#'@return The index of the clustering in the clusterings metadata global attribute corresponding to the clustering where the given cluster.name is found.
1469+
#'@export
1470+
get_cluster_info_by_cluster_name<-function(loom
1471+
, cluster.name) {
1472+
library(rlist)
1473+
# Get global meta data
1474+
md<-get_global_meta_data(loom = loom)
1475+
# Get the index of the clustering in the meta data clusterings
1476+
clustering.idx<-get_clustering_idx_by_cluster_name(loom = loom, cluster.name = cluster.name)
1477+
if(is.na(x = clustering.idx)) {
1478+
stop(paste0("The given cluster ", cluster.name, " does not exist in this .loom."))
1479+
}
1480+
clustering.id<-md$clusterings[[clustering.idx]]$id
1481+
cluster.idx<-list.findi(.data = md$clusterings[[clustering.idx]]$clusters, description == cluster.name)
1482+
return (list(clustering.id=clustering.id
1483+
, clustering.name=md$clusterings[[clustering.idx]]$name
1484+
, clustering.group=md$clusterings[[clustering.idx]]$group
1485+
, cluster.id=md$clusterings[[clustering.idx]]$clusters[[cluster.idx]]$id
1486+
, cluster.name=md$clusterings[[clustering.idx]]$clusters[[cluster.idx]]$description))
1487+
}
1488+
1489+
#'@title get_clusterings
1490+
#'@description Get clustering of the given loom.
1491+
#'@param loom The loom file handler.
1492+
#'@param cluster.name The name of the cluster.
1493+
#'@return A N-by-M data.frame containing the clusterings of the given loom. N represents the cells and M the clusterings.
1494+
#'@export
1495+
get_clusterings<-function(loom) {
1496+
return (get_col_attr_by_key(loom = loom, key = CA_CLUSTERINGS_NAME))
1497+
}
1498+
1499+
#'@title get_clustering_by_id
1500+
#'@description Get clustering with the given clustering.id of the given loom.
1501+
#'@param loom The loom file handler.
1502+
#'@param clustering.id The ID of the clustering.
1503+
#'@return A N-by-1 vector containing the cell assignments to each of the clusters of the clustering.
1504+
#'@export
1505+
get_clustering_by_id<-function(loom
1506+
, clustering.id) {
1507+
ca.clusterings<-get_clusterings(loom = loom)
1508+
return (ca.clusterings[, colnames(ca.clusterings)%in%clustering.id])
1509+
}
1510+
1511+
#'@title get_cell_mask_by_cluster_name
1512+
#'@description Get a cell mask for the given cluster.name of the given loom.
1513+
#'@param loom The loom file handler.
1514+
#'@param cluster.name The name of the cluster.
1515+
#'@return A N-by-1 boolean vector specifying which cells belong to the given cluster.name in the given loom.
1516+
get_cell_mask_by_cluster_name<-function(loom
1517+
, cluster.name) {
1518+
# Get the cluster info given the cluster.name
1519+
cluster.info<-get_cluster_info_by_cluster_name(loom = loom, cluster.name = cluster.name)
1520+
# Get the clustering related to the given cluster.name
1521+
ca.clustering<-get_clustering_by_id(loom = loom, clustering.id = cluster.info$clustering.id)
1522+
# Create the mask
1523+
return (ca.clustering%in%cluster.info$cluster.id)
1524+
}
1525+
1526+
#'@title get_cluster_dgem_by_name
1527+
#'@description Get a subset of the digital gene expression matrix containing only the cells in the cluster annotated by the given cluster.name.
1528+
#'@param loom The loom file handler.
1529+
#'@param cluster.name The name/description of the cluster.
1530+
#'@return A N-by-M matrix containing the gene expression levels of the cells in the cluster annotated by the given cluster.name. N represents the genes and M the cells.
1531+
#'@export
1532+
get_cluster_dgem_by_name<-function(loom
1533+
, cluster.name) {
1534+
# Get the cell mask for the given cluster.name
1535+
mask<-get_cell_mask_by_cluster_name(loom = loom, cluster.name = cluster.name)
1536+
dgem<-get_dgem(loom = loom)
1537+
return (dgem[, mask])
1538+
}
1539+
1540+

README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# SCopeLoomR v0.2.2
2-
An R package (compatible with SCope) to create generic .loom files and extend them with other data e.g.: SCENIC regulons, Seurat clusters and markers, ...
1+
# SCopeLoomR v0.3.0
2+
An R package (compatible with SCope) to create generic .loom files and extend them with other data e.g.: SCENIC regulons, Seurat clusters and markers, ... The package can also be used to extract
3+
data from .loom files.
34

45
## Installation
56

@@ -12,10 +13,16 @@ install_github("aertslab/SCopeLoomR")
1213
```
1314

1415
## Tutorial
15-
You can find a tutorial on how to create .loom files [here](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).
16+
You can find a tutorial on how to create .loom files and extract data from them [here](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).
1617

1718
## Version History
1819

20+
November 8, 2018
21+
22+
* Version 0.3.0
23+
* Add feature to extract the gene expression matrix of a given cluster.
24+
* Update [tutorial](https://github.com/aertslab/SCopeLoomR/blob/master/vignettes/SCopeLoomR_tutorial.Rmd).
25+
1926
October 31, 2018
2027

2128
* Version 0.2.2

man/get_cell_mask_by_cluster_name.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_cluster_dgem_by_name.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_cluster_info_by_cluster_name.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_clustering_by_id.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_clustering_idx_by_cluster_name.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/get_clusterings.Rd

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vignettes/SCopeLoomR_tutorial.Rmd

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
---
2-
title: "SCopeLoomR tutorial"
2+
title: "SCopeLoomR Tutorial - Create a .loom file and extract data from a .loom"
33
package: r pkg_ver('SCopeLoomR')
44
output:
55
html_notebook:
66
toc: yes
77
html_document:
8+
keep_md: true
89
df_print: paged
910
toc: yes
1011
BiocStyle::html_document:
@@ -200,6 +201,24 @@ add_embedding(loom = loom, embedding = monocle.embedding, name = "Monocle (DDRTr
200201
finalize(loom=loom)
201202
```
202203

204+
# Extract data from a loom object
205+
206+
The .loom file related to this part of the tutorial can be downloaded at http://scope.aertslab.org in the left panel under `Drosophila` > `Brain`.
207+
208+
## Get the gene expression matrix
209+
210+
```{r}
211+
loom<-open_loom(loom = "Aerts_Fly_AdultBrain_Filtered_57k.loom")
212+
dgem<-get_dgem(loom = loom)
213+
```
214+
215+
## Get the gene expression matrix of a given cluster
216+
217+
```{r}
218+
loom<-open_loom(loom = "Aerts_Fly_AdultBrain_Filtered_57k.loom")
219+
cluster.10.dgem<-get_cluster_dgem_by_name(loom = loom, cluster.name = "MBON - Cluster 57")
220+
```
221+
203222

204223

205224

vignettes/SCopeLoomR_tutorial.nb.html

Lines changed: 84 additions & 31 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)