Skip to content

Commit 98b61b2

Browse files
authored
Added protein coding biotype tests (#166)
* added unit tests for Transcript.biotype and Gene.biotype * version bump
1 parent 2b83f78 commit 98b61b2

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

pyensembl/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
)
3636
from .transcript import Transcript
3737

38-
__version__ = '1.0.0'
38+
__version__ = '1.0.1'
3939

4040
def cached_release(release, species="human"):
4141
"""

test/test_gene_objects.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
from __future__ import absolute_import
22

3+
from nose.tools import eq_
4+
35
from .common import test_ensembl_releases
46
from .data import TP53_gene_id
57

68
@test_ensembl_releases()
7-
def test_TP53_gene_object_by_id(ensembl):
9+
def test_TP53_gene_object_by_id(genome):
810
# when we look up TP53 by its gene ID, we should get the
911
# correct gene back
10-
gene = ensembl.gene_by_id(TP53_gene_id)
12+
gene = genome.gene_by_id(TP53_gene_id)
1113
assert gene.name == "TP53", \
1214
"Incorrect gene name %s for gene ID %s in %s" % (
13-
gene.name, gene.id, ensembl)
15+
gene.name, gene.id, genome)
1416
assert gene.contig == "17", \
1517
"Incorrect gene contig %s for gene ID %s in %s" % (
16-
gene.contig, gene.id, ensembl)
18+
gene.contig, gene.id, genome)
1719

1820
@test_ensembl_releases()
19-
def test_TP53_gene_object_by_name(ensembl):
20-
genes = ensembl.genes_by_name("TP53")
21+
def test_TP53_gene_object_by_name(genome):
22+
genes = genome.genes_by_name("TP53")
2123
# we should only have one TP53 gene (there aren't any copies)
2224
assert len(genes) == 1, \
2325
"Expected only one gene with name TP53, got %s" % (genes,)
@@ -26,17 +28,23 @@ def test_TP53_gene_object_by_name(ensembl):
2628
"Expected gene to have ID %s, got %s" % (TP53_gene_id, genes[0].id)
2729

2830
@test_ensembl_releases()
29-
def test_equal_genes(ensembl):
30-
gene1 = ensembl.genes_by_name("TP53")[0]
31+
def test_equal_genes(genome):
32+
gene1 = genome.genes_by_name("TP53")[0]
3133
# get an identical gene
32-
gene2 = ensembl.gene_by_id(gene1.id)
34+
gene2 = genome.gene_by_id(gene1.id)
3335

3436
assert hash(gene1) == hash(gene2)
3537
assert gene1 == gene2
3638

3739
@test_ensembl_releases()
38-
def test_not_equal_genes(release):
39-
gene1 = release.genes_by_name("MUC1")[0]
40-
gene2 = release.genes_by_name("BRCA1")[0]
40+
def test_not_equal_genes(genome):
41+
gene1 = genome.genes_by_name("MUC1")[0]
42+
gene2 = genome.genes_by_name("BRCA1")[0]
4143
assert hash(gene1) != hash(gene2)
4244
assert gene1 != gene2
45+
46+
@test_ensembl_releases()
47+
def test_BRCA1_protein_coding_biotype(genome):
48+
gene = genome.genes_by_name("BRCA1")[0]
49+
assert gene.is_protein_coding
50+
eq_(gene.biotype, "protein_coding")

test/test_transcript_objects.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ def test_transcript_exons():
7777
# TODO: Add gene_id patching to gtf_parsing, add ensembl54 to the list
7878
# below
7979
@test_ensembl_releases(75, 77)
80-
def test_sequence_parts(ensembl):
80+
def test_sequence_parts(genome):
8181
# Ensure that the UTRs and coding sequence can be
8282
# combined to make the full transcript.
83-
transcript = ensembl.transcript_by_id(FOXP3_001_transcript_id)
83+
transcript = genome.transcript_by_id(FOXP3_001_transcript_id)
8484

8585
# The combined lengths of the upstream untranslated region,
8686
# coding sequence, and downstream untranslated region
@@ -149,17 +149,17 @@ def test_transcript_cds_CTNNIP1_004():
149149
eq_(cds, CTNNBIP1_004_CDS)
150150

151151
@test_ensembl_releases()
152-
def test_equal_transcripts(ensembl):
153-
t1 = ensembl.transcripts_by_name("TP53-001")[0]
152+
def test_equal_transcripts(genome):
153+
t1 = genome.transcripts_by_name("TP53-001")[0]
154154
# get an identical gene
155-
t2 = ensembl.transcript_by_id(t1.id)
155+
t2 = genome.transcript_by_id(t1.id)
156156
eq_(t1, t2)
157157
eq_(hash(t1), hash(t2))
158158

159159
@test_ensembl_releases()
160-
def test_not_equal_transcripts(release):
161-
t1 = release.transcripts_by_name("MUC1-001")[0]
162-
t2 = release.transcripts_by_name("BRCA1-001")[0]
160+
def test_not_equal_transcripts(genome):
161+
t1 = genome.transcripts_by_name("MUC1-001")[0]
162+
t2 = genome.transcripts_by_name("BRCA1-001")[0]
163163
assert_not_equal(t1, t2)
164164

165165
def test_protein_id():
@@ -174,3 +174,9 @@ def test_transcript_gene_should_match_parent_gene():
174174
gene = ensembl77.gene_by_id(TP53_gene_id)
175175
for transcript in gene.transcripts:
176176
eq_(transcript.gene, gene)
177+
178+
@test_ensembl_releases()
179+
def test_BRCA1_001_has_protein_coding_biotype(genome):
180+
transcript = genome.transcripts_by_name("BRCA1-001")[0]
181+
assert transcript.is_protein_coding
182+
eq_(transcript.biotype, "protein_coding")

0 commit comments

Comments
 (0)