@@ -244,47 +244,96 @@ deconvolute_mouse <- function(gene_expression_matrix,
244
244
245
245
# ' This function converts the mouse gene symbols into corresponding human ones.
246
246
# '
247
- # ' This function relies on the `biomaRt`` package.
247
+ # ' This function relies on the `biomaRt`` package and connects to the ENSEMBL repository
248
+ # ' to retrieve the gene symbols. If ENSEMBL cannot be reached, another solution will be
249
+ # ' used. Since it is memory intensive, users can choose not to run it.
248
250
# '
249
251
# ' @param gene_expression_matrix a m x n matrix with m genes and n samples.
250
252
# ' Gene symbols must be the rownames of the matrix.
251
253
# ' @param mirror the ensembl mirror to use. Possible choices are 'www' (default),
252
254
# ' 'uswest', 'useast', 'asia'
255
+ # ' @param other_annot boolean, wether to run the other conversion method (might be memory intensive)
253
256
# ' @return the same matrix, with the counts for the corresponding human genes.
254
257
# ' This matrix can directly be used with the immunedeconv methods. A message
255
258
# ' will display the ratio of original genes which were converted.
256
259
# '
257
260
# ' @export
258
- mouse_genes_to_human <- function (gene_expression_matrix , mirror = " www" ) {
261
+ mouse_genes_to_human <- function (gene_expression_matrix , mirror = " www" , other_annot = TRUE ) {
259
262
gene.names.mouse <- rownames(gene_expression_matrix )
260
263
gene_expression_matrix $ gene_name <- gene.names.mouse
261
264
262
- # human = useMart('ensembl', dataset = 'hsapiens_gene_ensembl')
263
- # mouse = useMart('ensembl', dataset = 'mmusculus_gene_ensembl')
264
265
human <- useEnsembl(" ensembl" , dataset = " hsapiens_gene_ensembl" , mirror = mirror )
265
266
mouse <- useEnsembl(" ensembl" , dataset = " mmusculus_gene_ensembl" , mirror = mirror )
266
- genes.retrieved <- getLDS(
267
- attributes = c(" mgi_symbol" ), filters = " mgi_symbol" , values = gene.names.mouse ,
268
- mart = mouse , attributesL = c(" hgnc_symbol" ), martL = human , uniqueRows = T
269
- )
270
-
271
-
272
- newGenes.counts <- gene_expression_matrix %> %
273
- left_join(. , genes.retrieved , by = c(" gene_name" = " MGI.symbol" )) %> %
274
- select(. , - c(" gene_name" )) %> %
275
- select(. , c(" HGNC.symbol" , everything())) %> %
276
- . [! (is.na(. $ HGNC.symbol )), ]
277
267
278
- colnames(newGenes.counts )[1 ] <- " gene_name"
279
- newGenes.counts <- newGenes.counts [! (duplicated(newGenes.counts $ gene_name )), ] %> %
280
- as.data.frame(. )
281
- rownames(newGenes.counts ) <- newGenes.counts $ gene_name
282
- newGenes.counts <- select(newGenes.counts , - c(" gene_name" ))
283
-
284
- fraction <- 100 * (nrow(newGenes.counts ) / nrow(gene_expression_matrix )) %> %
285
- round(. , 1 )
286
-
287
- message(paste0(" ATTENTION: Only the " , fraction , " % of genes was maintained" ))
268
+ genes.retrieved <- NULL
269
+ tryCatch(
270
+ expr = {
271
+ genes.retrieved <<- getLDS(
272
+ attributes = c(" mgi_symbol" ),
273
+ filters = " mgi_symbol" , values = gene.names.mouse ,
274
+ mart = mouse , attributesL = c(" hgnc_symbol" ), martL = human , uniqueRows = T
275
+ )
276
+
277
+ newGenes.counts <- gene_expression_matrix %> %
278
+ left_join(. , genes.retrieved , by = c(" gene_name" = " MGI.symbol" )) %> %
279
+ select(. , - c(" gene_name" )) %> %
280
+ select(. , c(" HGNC.symbol" , everything())) %> %
281
+ . [! (is.na(. $ HGNC.symbol )), ]
282
+
283
+ colnames(newGenes.counts )[1 ] <- " gene_name"
284
+ newGenes.counts <- newGenes.counts [! (duplicated(newGenes.counts $ gene_name )), ] %> %
285
+ as.data.frame(. )
286
+ rownames(newGenes.counts ) <- newGenes.counts $ gene_name
287
+ newGenes.counts <- select(newGenes.counts , - c(" gene_name" ))
288
+
289
+ fraction <- 100 * (nrow(newGenes.counts ) / nrow(gene_expression_matrix )) %> %
290
+ round(. , 1 )
291
+
292
+ message(paste0(" ATTENTION: Only the " , fraction , " % of genes was maintained" ))
293
+ },
294
+ error = function (e ) {
295
+ print(" Cannot connect to ENSEMBL. Using alternative method. This will take some time." )
296
+
297
+ if (manual_annot ) {
298
+ # Code adapted from: https://support.bioconductor.org/p/129636/#9144606
299
+
300
+ mouse_human_genes <- read.csv(" http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt" , sep = " \t " )
301
+
302
+ find_corr_gene <- function (gene , mouse_human_genes_df ) {
303
+ class_key <- (mouse_human_genes_df %> %
304
+ filter(Symbol == gene & Common.Organism.Name == " mouse, laboratory" ))[[" DB.Class.Key" ]]
305
+ if (! identical(class_key , integer(0 ))) {
306
+ output <- NULL
307
+ human_genes <- (mouse_human_genes_df %> % filter(DB.Class.Key == class_key & Common.Organism.Name == " human" ))[, " Symbol" ]
308
+ for (human_gene in human_genes ) {
309
+ output <- append(output , human_gene )
310
+ }
311
+ if (! is.null(output )) {
312
+ return (
313
+ data.frame (
314
+ " human_gene" = output ,
315
+ " mouse_gene" = gene
316
+ )
317
+ )
318
+ }
319
+ }
320
+ }
321
+
322
+ genes.retrieved <- map_dfr(gene.names.mouse , function (x ) find_corr_gene(x , mouse_human_genes ))
323
+
324
+ newGenes.counts <- gene_expression_matrix %> %
325
+ left_join(. , genes.retrieved , by = c(" gene_name" = " mouse_gene" )) %> %
326
+ select(. , - c(" gene_name" )) %> %
327
+ select(. , c(" human_gene" , everything())) %> %
328
+ . [! (is.na(. $ human_gene )), ]
329
+
330
+ fraction <- 100 * (nrow(newGenes.counts ) / nrow(gene_expression_matrix )) %> %
331
+ round(. , 1 )
332
+
333
+ message(paste0(" ATTENTION: Only the " , fraction , " % of genes was maintained" ))
334
+ }
335
+ }
336
+ )
288
337
289
338
return (newGenes.counts )
290
339
}
0 commit comments