Skip to content

Commit a7e4ee9

Browse files
Merge pull request #150 from omnideconv/update_conversion_genes
Update of the conversion function
2 parents ad0597a + 6ca3d92 commit a7e4ee9

File tree

2 files changed

+84
-27
lines changed

2 files changed

+84
-27
lines changed

R/mouse_deconvolution_methods.R

Lines changed: 74 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -244,47 +244,96 @@ deconvolute_mouse <- function(gene_expression_matrix,
244244

245245
#' This function converts the mouse gene symbols into corresponding human ones.
246246
#'
247-
#' This function relies on the `biomaRt`` package.
247+
#' This function relies on the `biomaRt`` package and connects to the ENSEMBL repository
248+
#' to retrieve the gene symbols. If ENSEMBL cannot be reached, another solution will be
249+
#' used. Since it is memory intensive, users can choose not to run it.
248250
#'
249251
#' @param gene_expression_matrix a m x n matrix with m genes and n samples.
250252
#' Gene symbols must be the rownames of the matrix.
251253
#' @param mirror the ensembl mirror to use. Possible choices are 'www' (default),
252254
#' 'uswest', 'useast', 'asia'
255+
#' @param other_annot boolean, wether to run the other conversion method (might be memory intensive)
253256
#' @return the same matrix, with the counts for the corresponding human genes.
254257
#' This matrix can directly be used with the immunedeconv methods. A message
255258
#' will display the ratio of original genes which were converted.
256259
#'
257260
#' @export
258-
mouse_genes_to_human <- function(gene_expression_matrix, mirror = "www") {
261+
mouse_genes_to_human <- function(gene_expression_matrix, mirror = "www", other_annot = TRUE) {
259262
gene.names.mouse <- rownames(gene_expression_matrix)
260263
gene_expression_matrix$gene_name <- gene.names.mouse
261264

262-
# human = useMart('ensembl', dataset = 'hsapiens_gene_ensembl')
263-
# mouse = useMart('ensembl', dataset = 'mmusculus_gene_ensembl')
264265
human <- useEnsembl("ensembl", dataset = "hsapiens_gene_ensembl", mirror = mirror)
265266
mouse <- useEnsembl("ensembl", dataset = "mmusculus_gene_ensembl", mirror = mirror)
266-
genes.retrieved <- getLDS(
267-
attributes = c("mgi_symbol"), filters = "mgi_symbol", values = gene.names.mouse,
268-
mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows = T
269-
)
270-
271-
272-
newGenes.counts <- gene_expression_matrix %>%
273-
left_join(., genes.retrieved, by = c("gene_name" = "MGI.symbol")) %>%
274-
select(., -c("gene_name")) %>%
275-
select(., c("HGNC.symbol", everything())) %>%
276-
.[!(is.na(.$HGNC.symbol)), ]
277267

278-
colnames(newGenes.counts)[1] <- "gene_name"
279-
newGenes.counts <- newGenes.counts[!(duplicated(newGenes.counts$gene_name)), ] %>%
280-
as.data.frame(.)
281-
rownames(newGenes.counts) <- newGenes.counts$gene_name
282-
newGenes.counts <- select(newGenes.counts, -c("gene_name"))
283-
284-
fraction <- 100 * (nrow(newGenes.counts) / nrow(gene_expression_matrix)) %>%
285-
round(., 1)
286-
287-
message(paste0("ATTENTION: Only the ", fraction, "% of genes was maintained"))
268+
genes.retrieved <- NULL
269+
tryCatch(
270+
expr = {
271+
genes.retrieved <<- getLDS(
272+
attributes = c("mgi_symbol"),
273+
filters = "mgi_symbol", values = gene.names.mouse,
274+
mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows = T
275+
)
276+
277+
newGenes.counts <- gene_expression_matrix %>%
278+
left_join(., genes.retrieved, by = c("gene_name" = "MGI.symbol")) %>%
279+
select(., -c("gene_name")) %>%
280+
select(., c("HGNC.symbol", everything())) %>%
281+
.[!(is.na(.$HGNC.symbol)), ]
282+
283+
colnames(newGenes.counts)[1] <- "gene_name"
284+
newGenes.counts <- newGenes.counts[!(duplicated(newGenes.counts$gene_name)), ] %>%
285+
as.data.frame(.)
286+
rownames(newGenes.counts) <- newGenes.counts$gene_name
287+
newGenes.counts <- select(newGenes.counts, -c("gene_name"))
288+
289+
fraction <- 100 * (nrow(newGenes.counts) / nrow(gene_expression_matrix)) %>%
290+
round(., 1)
291+
292+
message(paste0("ATTENTION: Only the ", fraction, "% of genes was maintained"))
293+
},
294+
error = function(e) {
295+
print("Cannot connect to ENSEMBL. Using alternative method. This will take some time.")
296+
297+
if (manual_annot) {
298+
# Code adapted from: https://support.bioconductor.org/p/129636/#9144606
299+
300+
mouse_human_genes <- read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt", sep = "\t")
301+
302+
find_corr_gene <- function(gene, mouse_human_genes_df) {
303+
class_key <- (mouse_human_genes_df %>%
304+
filter(Symbol == gene & Common.Organism.Name == "mouse, laboratory"))[["DB.Class.Key"]]
305+
if (!identical(class_key, integer(0))) {
306+
output <- NULL
307+
human_genes <- (mouse_human_genes_df %>% filter(DB.Class.Key == class_key & Common.Organism.Name == "human"))[, "Symbol"]
308+
for (human_gene in human_genes) {
309+
output <- append(output, human_gene)
310+
}
311+
if (!is.null(output)) {
312+
return(
313+
data.frame(
314+
"human_gene" = output,
315+
"mouse_gene" = gene
316+
)
317+
)
318+
}
319+
}
320+
}
321+
322+
genes.retrieved <- map_dfr(gene.names.mouse, function(x) find_corr_gene(x, mouse_human_genes))
323+
324+
newGenes.counts <- gene_expression_matrix %>%
325+
left_join(., genes.retrieved, by = c("gene_name" = "mouse_gene")) %>%
326+
select(., -c("gene_name")) %>%
327+
select(., c("human_gene", everything())) %>%
328+
.[!(is.na(.$human_gene)), ]
329+
330+
fraction <- 100 * (nrow(newGenes.counts) / nrow(gene_expression_matrix)) %>%
331+
round(., 1)
332+
333+
message(paste0("ATTENTION: Only the ", fraction, "% of genes was maintained"))
334+
}
335+
}
336+
)
288337

289338
return(newGenes.counts)
290339
}

man/mouse_genes_to_human.Rd

Lines changed: 10 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)