Skip to content

Commit 4d36a66

Browse files
committed
improve around gene function
1 parent b3c9045 commit 4d36a66

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

figeno/genes.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,8 @@ def read_genes_gff3(gff3_file,chr=None,start=None,end=None,gene_names=None,colla
339339
else: infile =open(gff3_file,"r")
340340
for line in infile:
341341
if line.startswith("#"): continue
342-
linesplit = line.split("\t")
342+
if line=="\n": continue
343+
linesplit = line.rstrip("\n").split("\t")
343344
if len(linesplit)<9: raise KnownException("Wrong format for the genes file (there should be at least 9 columns per line).")
344345

345346
if chr is not None and linesplit[0].lstrip("chr") != chr.lstrip("chr"): continue
@@ -427,7 +428,7 @@ def find_genecoord_wrapper(gene_name,reference,genes_file=None):
427428
if reference in ["hg19","hg38","mm10"]:
428429

429430
with resources.as_file(resources.files(figeno.data) / (reference+"_genes.txt.gz")) as infile:
430-
(chr,min_coord,max_coord) = find_genecoord_refseq(gene_name,infile)
431+
(chr,min_coord,max_coord) = find_genecoord_refseq(gene_name,infile,custom_ref=False)
431432
with resources.as_file(resources.files(figeno.data) / (reference+"_cytobands.tsv")) as infile2: #Ensure the end is smaller than the chr size.
432433
chr_length=-1
433434
with open(infile2,"r") as f:
@@ -452,7 +453,7 @@ def find_genecoord_wrapper(gene_name,reference,genes_file=None):
452453
else:
453454
return find_genecoord_refseq(gene_name,genes_file)
454455

455-
def find_genecoord_refseq(gene_name,file=None):
456+
def find_genecoord_refseq(gene_name,file=None,custom_ref=True):
456457
chr=""
457458
min_coord=1e10
458459
max_coord=0
@@ -467,9 +468,11 @@ def find_genecoord_refseq(gene_name,file=None):
467468
if line.startswith("#"): continue
468469
linesplit = line.split("\t")
469470
if linesplit[12].upper()==gene_name.upper():
470-
chr=linesplit[2].lstrip("chr")
471-
min_coord=min(min_coord,int(linesplit[4]))
472-
max_coord=max(max_coord,int(linesplit[5]))
471+
chr_tmp=linesplit[2].lstrip("chr")
472+
if (not custom_ref) and (not "_" in chr_tmp): # avoid alternative contigs...
473+
chr=linesplit[2].lstrip("chr")
474+
min_coord=min(min_coord,int(linesplit[4]))
475+
max_coord=max(max_coord,int(linesplit[5]))
473476

474477
if chr!="":
475478
length=max_coord-min_coord

0 commit comments

Comments
 (0)