Skip to content

Commit 7077c8f

Browse files
committed
Merge pull request #39 from hammerlab/search_over_loci
Search over loci
2 parents 3bedf41 + e124676 commit 7077c8f

File tree

6 files changed

+111
-7
lines changed

6 files changed

+111
-7
lines changed

pyensembl/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
from .gtf import GTF
77
from .locus import Locus
88
from .reference_transcripts import ReferenceTranscripts
9-
from .transcript import Transcript
9+
from .search import find_nearest_locus
10+
from .transcript import Transcript

pyensembl/locus.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,27 @@ def can_overlap(self, contig, strand=None):
171171
and
172172
(strand is None or self.on_strand(strand)))
173173

174+
def distance_to_interval(self, start, end):
175+
"""
176+
Find the distance between intervals [start1, end1] and [start2, end2].
177+
If the intervals overlap then the distance is 0.
178+
"""
179+
if self.start > end:
180+
# interval is before this exon
181+
return self.start - end
182+
elif self.end < start:
183+
# exon is before the interval
184+
return start - self.end
185+
else:
186+
return 0
187+
188+
def distance_to_locus(self, other):
189+
if not self.can_overlap(other.contig, other.strand):
190+
# if two loci are on different contigs or strands,
191+
# can't compute a distance between them
192+
return float("inf")
193+
return self.distance_to_interval(other.start, other.end)
194+
174195
def overlaps(self, contig, start, end, strand=None):
175196
"""
176197
Does this locus overlap with a given range of positions?
@@ -181,9 +202,7 @@ def overlaps(self, contig, start, end, strand=None):
181202
return (
182203
self.can_overlap(contig, strand)
183204
and
184-
end >= self.start
185-
and
186-
start <= self.end)
205+
self.distance_to_interval(start, end) == 0)
187206

188207
def overlaps_locus(self, other_locus):
189208
return self.overlaps(
@@ -205,4 +224,4 @@ def contains_locus(self, other_locus):
205224
other_locus.contig,
206225
other_locus.start,
207226
other_locus.end,
208-
other_locus.strand)
227+
other_locus.strand)

pyensembl/search.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Helper functions for searching over collections of PyEnsembl objects
3+
"""
4+
5+
def find_nearest_locus(start, end, loci):
6+
"""
7+
Finds nearest locus (object with method `distance_to_interval`) to the
8+
interval defined by the given `start` and `end` positions.
9+
Returns the distance to that locus, along with the locus object itself.
10+
"""
11+
best_distance = float("inf")
12+
best_locus = None
13+
for locus in loci:
14+
distance = locus.distance_to_interval(start, end)
15+
16+
if best_distance > distance:
17+
best_distance = distance
18+
best_locus = locus
19+
20+
return best_distance, best_locus

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
if __name__ == '__main__':
3232
setup(
3333
name='pyensembl',
34-
version="0.5.3",
34+
version="0.5.4",
3535
description="Python interface to ensembl reference genome metadata",
3636
author="Alex Rubinsteyn",
3737
author_email="alex {dot} rubinsteyn {at} mssm {dot} edu",

test/test_locus.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from nose.tools import assert_raises
44

5-
65
def test_normalize_chromosome():
76
assert normalize_chromosome("X") == "X"
87
assert normalize_chromosome("chrX") == "X"
@@ -162,4 +161,14 @@ def test_range_offset():
162161
with assert_raises(ValueError):
163162
negative_locus.offset_range(9, 10)
164163

164+
def test_locus_distance():
165+
locus_chr1_10_20_pos = Locus("1", 10, 20, "+")
166+
locus_chr1_21_25_pos = Locus("1", 21, 25, "+")
167+
locus_chr2_21_25_pos = Locus("2", 21, 25, "+")
168+
locus_chr1_21_25_neg = Locus("1", 21, 25, "-")
169+
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr1_21_25_pos) == 1
170+
assert locus_chr1_21_25_pos.distance_to_locus(locus_chr1_10_20_pos) == 1
171+
inf = float("inf")
172+
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr2_21_25_pos) == inf
173+
assert locus_chr1_10_20_pos.distance_to_locus(locus_chr1_21_25_neg) == inf
165174

test/test_search.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from test_common import test_ensembl_releases
2+
from pyensembl import EnsemblRelease, find_nearest_locus
3+
from nose.tools import eq_
4+
5+
@test_ensembl_releases
6+
def test_find_nearest_BRAF_exon(ensembl):
7+
braf = ensembl.genes_by_name("BRAF")[0]
8+
braf_transcripts = braf.transcripts
9+
for exon in braf_transcripts[0].exons:
10+
# immediately before exon
11+
result = find_nearest_locus(
12+
start=exon.start-2,
13+
end=exon.end-1,
14+
loci=exons)
15+
eq_(result, (1, exon))
16+
17+
# overlapping with exon
18+
result = find_nearest_locus(
19+
start=exon.start-2,
20+
end=exon.start+1,
21+
loci=exons)
22+
eq_(result, (0, exon))
23+
24+
# immediately after exon
25+
result = find_nearest_locus(
26+
start=exon.end+1,
27+
end=exon.end+2,
28+
loci=exons)
29+
eq_(result, (1, exon))
30+
31+
@test_ensembl_releases
32+
def test_find_nearest_BRAF_transcript(ensembl):
33+
braf = ensembl.genes_by_name("BRAF")[0]
34+
braf_transcripts = braf.transcripts
35+
for transcript in braf_transcripts:
36+
# immediately before transcript
37+
result = find_nearest_locus(
38+
start=transcript.start-2,
39+
end=transcript.end-1,
40+
loci=braf_transcripts)
41+
eq_(result, (1, transcript))
42+
43+
# overlapping with transcript
44+
result = find_nearest_locus(
45+
start=transcript.start-2,
46+
end=transcript.start+1,
47+
exons=braf_transcripts)
48+
eq_(result, (0, transcript))
49+
50+
# immediately after transcript
51+
result = find_nearest_locus(
52+
start=transcript.end+1,
53+
end=transcript.end+2,
54+
exons=braf_transcripts)
55+
eq_(result, (1, transcript))

0 commit comments

Comments
 (0)