@@ -339,7 +339,8 @@ def read_genes_gff3(gff3_file,chr=None,start=None,end=None,gene_names=None,colla
339
339
else : infile = open (gff3_file ,"r" )
340
340
for line in infile :
341
341
if line .startswith ("#" ): continue
342
- linesplit = line .split ("\t " )
342
+ if line == "\n " : continue
343
+ linesplit = line .rstrip ("\n " ).split ("\t " )
343
344
if len (linesplit )< 9 : raise KnownException ("Wrong format for the genes file (there should be at least 9 columns per line)." )
344
345
345
346
if chr is not None and linesplit [0 ].lstrip ("chr" ) != chr .lstrip ("chr" ): continue
@@ -427,7 +428,7 @@ def find_genecoord_wrapper(gene_name,reference,genes_file=None):
427
428
if reference in ["hg19" ,"hg38" ,"mm10" ]:
428
429
429
430
with resources .as_file (resources .files (figeno .data ) / (reference + "_genes.txt.gz" )) as infile :
430
- (chr ,min_coord ,max_coord ) = find_genecoord_refseq (gene_name ,infile )
431
+ (chr ,min_coord ,max_coord ) = find_genecoord_refseq (gene_name ,infile , custom_ref = False )
431
432
with resources .as_file (resources .files (figeno .data ) / (reference + "_cytobands.tsv" )) as infile2 : #Ensure the end is smaller than the chr size.
432
433
chr_length = - 1
433
434
with open (infile2 ,"r" ) as f :
@@ -452,7 +453,7 @@ def find_genecoord_wrapper(gene_name,reference,genes_file=None):
452
453
else :
453
454
return find_genecoord_refseq (gene_name ,genes_file )
454
455
455
- def find_genecoord_refseq (gene_name ,file = None ):
456
+ def find_genecoord_refseq (gene_name ,file = None , custom_ref = True ):
456
457
chr = ""
457
458
min_coord = 1e10
458
459
max_coord = 0
@@ -467,9 +468,11 @@ def find_genecoord_refseq(gene_name,file=None):
467
468
if line .startswith ("#" ): continue
468
469
linesplit = line .split ("\t " )
469
470
if linesplit [12 ].upper ()== gene_name .upper ():
470
- chr = linesplit [2 ].lstrip ("chr" )
471
- min_coord = min (min_coord ,int (linesplit [4 ]))
472
- max_coord = max (max_coord ,int (linesplit [5 ]))
471
+ chr_tmp = linesplit [2 ].lstrip ("chr" )
472
+ if (not custom_ref ) and (not "_" in chr_tmp ): # avoid alternative contigs...
473
+ chr = linesplit [2 ].lstrip ("chr" )
474
+ min_coord = min (min_coord ,int (linesplit [4 ]))
475
+ max_coord = max (max_coord ,int (linesplit [5 ]))
473
476
474
477
if chr != "" :
475
478
length = max_coord - min_coord
0 commit comments