16
16
Contains the EnsemblRelease class, which extends the Genome class
17
17
to be specific to (a particular release of) Ensembl.
18
18
"""
19
+ from weakref import WeakValueDictionary
19
20
20
21
from .genome import Genome
21
22
from .ensembl_release_versions import check_release_number , MAX_ENSEMBL_RELEASE
@@ -32,18 +33,53 @@ class EnsemblRelease(Genome):
32
33
Bundles together the genomic annotation and sequence data associated with
33
34
a particular release of the Ensembl database.
34
35
"""
35
- def __init__ (self ,
36
- release = MAX_ENSEMBL_RELEASE ,
37
- species = human ,
38
- server = ENSEMBL_FTP_SERVER ):
39
- self .release = check_release_number (release )
40
- self .species = check_species_object (species )
41
- self .server = server
36
+
37
+ @classmethod
38
+ def normalize_init_values (cls , release , species , server ):
39
+ """
40
+ Normalizes the arguments which uniquely specify an EnsemblRelease
41
+ genome.
42
+ """
43
+ release = check_release_number (release )
44
+ species = check_species_object (species )
45
+ return (release , species , server )
46
+
47
+ # Using a WeakValueDictionary instead of an ordinary dict to prevent a
48
+ # memory leak in cases where we test many different releases in sequence.
49
+ # When all the references to a particular EnsemblRelease die then that
50
+ # genome should also be removed from this cache.
51
+ _genome_cache = WeakValueDictionary ()
52
+
53
+ @classmethod
54
+ def cached (
55
+ cls ,
56
+ release = MAX_ENSEMBL_RELEASE ,
57
+ species = human ,
58
+ server = ENSEMBL_FTP_SERVER ):
59
+ """
60
+ Construct EnsemblRelease if it's never been made before, otherwise
61
+ return an old instance.
62
+ """
63
+ init_args_tuple = cls .normalize_init_values (release , species , server )
64
+ if init_args_tuple in cls ._genome_cache :
65
+ genome = cls ._genome_cache [init_args_tuple ]
66
+ else :
67
+ genome = cls ._genome_cache [init_args_tuple ] = cls (* init_args_tuple )
68
+ return genome
69
+
70
+ def __init__ (
71
+ self ,
72
+ release = MAX_ENSEMBL_RELEASE ,
73
+ species = human ,
74
+ server = ENSEMBL_FTP_SERVER ):
75
+ self .release , self .species , self .server = self .normalize_init_values (
76
+ release = release , species = species , server = server )
42
77
43
78
self .gtf_url = make_gtf_url (
44
79
ensembl_release = self .release ,
45
- species = species ,
46
- server = server )
80
+ species = self .species ,
81
+ server = self .server )
82
+
47
83
self .transcript_fasta_url = make_fasta_url (
48
84
ensembl_release = self .release ,
49
85
species = self .species .latin_name ,
@@ -53,7 +89,7 @@ def __init__(self,
53
89
ensembl_release = self .release ,
54
90
species = self .species .latin_name ,
55
91
sequence_type = "pep" ,
56
- server = server )
92
+ server = self . server )
57
93
58
94
self .reference_name = self .species .which_reference (self .release )
59
95
@@ -92,3 +128,10 @@ def to_dict(self):
92
128
"species" : self .species ,
93
129
"server" : self .server
94
130
}
131
+
132
+ @classmethod
133
+ def from_dict (cls , state_dict ):
134
+ """
135
+ Deserialize EnsemblRelease without creating duplicate instances.
136
+ """
137
+ return cls .cached (** state_dict )
0 commit comments