4
4
import hail as hl
5
5
6
6
from v03_pipeline .lib .model import (
7
- DatasetType ,
8
- ReferenceDatasetCollection ,
9
7
ReferenceGenome ,
10
8
)
11
9
from v03_pipeline .lib .reference_data .compare_globals import (
12
10
Globals ,
13
11
get_datasets_to_update ,
14
12
)
15
13
16
-
17
- class CompareGlobalsTest (unittest .TestCase ):
18
- @mock .patch .dict (
19
- 'v03_pipeline.lib.reference_data.compare_globals.CONFIG' ,
20
- {
21
- 'a' : {
22
- '38' : {
23
- 'custom_import' : None ,
24
- 'source_path' : 'a_path' , # 'a' has a custom import
25
- 'select' : {
26
- 'test_select' : 'info.test_select' ,
27
- 'test_enum' : 'test_enum' ,
28
- },
29
- 'version' : 'a_version' ,
30
- 'enum_select' : {'test_enum' : ['A' , 'B' ]},
31
- },
14
+ CONFIG = {
15
+ 'a' : {
16
+ '38' : {
17
+ 'custom_import' : None ,
18
+ 'source_path' : 'a_path' , # 'a' has a custom import
19
+ 'select' : {
20
+ 'test_select' : 'info.test_select' ,
21
+ 'test_enum' : 'test_enum' ,
32
22
},
33
- 'b ' : { # b is missing version
34
- '38 ' : {
35
- 'path' : 'b_path' ,
36
- 'select' : {
37
- 'test_select' : 'info.test_select' ,
38
- 'test_enum ' : 'test_enum' ,
39
- } ,
40
- 'enum_select ' : {'test_enum' : [ 'C' , 'D' ]},
41
- 'custom_select ' : lambda ht : { 'field2' : ht . info .test_select_2 } ,
42
- } ,
23
+ 'version ' : 'a_version' ,
24
+ 'enum_select ' : {'test_enum' : [ 'A' , 'B' ]},
25
+ } ,
26
+ },
27
+ 'b' : { # b is missing version
28
+ '38 ' : {
29
+ 'path' : 'b_path' ,
30
+ 'select ' : {
31
+ 'test_select ' : ' info.test_select' ,
32
+ 'test_enum' : 'test_enum' ,
43
33
},
34
+ 'enum_select' : {'test_enum' : ['C' , 'D' ]},
35
+ 'custom_select' : lambda ht : {'field2' : ht .info .test_select_2 },
44
36
},
45
- )
37
+ },
38
+ }
39
+
40
+ B_TABLE = hl .Table .parallelize (
41
+ [],
42
+ schema = hl .tstruct (
43
+ locus = hl .tlocus ('GRCh38' ),
44
+ alleles = hl .tarray (hl .tstr ),
45
+ info = hl .tstruct (
46
+ test_select = hl .tint ,
47
+ test_select_2 = hl .tint ,
48
+ ),
49
+ test_enum = hl .tstr ,
50
+ ),
51
+ globals = hl .Struct (
52
+ version = 'b_version' ,
53
+ path = 'b_path' ,
54
+ enums = hl .Struct (test_enum = ['C' , 'D' ]),
55
+ ),
56
+ key = ['locus' , 'alleles' ],
57
+ )
58
+
59
+
60
+ class CompareGlobalsTest (unittest .TestCase ):
61
+ @mock .patch .dict ('v03_pipeline.lib.reference_data.compare_globals.CONFIG' , CONFIG )
46
62
@mock .patch (
47
63
'v03_pipeline.lib.reference_data.compare_globals.import_ht_from_config_path' ,
48
64
)
49
- @mock .patch .object (ReferenceDatasetCollection , 'datasets' )
50
- def test_create_globals_from_dataset_ht_configs (
65
+ def test_create_globals_from_dataset_configs (
51
66
self ,
52
- mock_rdc_datasets ,
53
67
mock_import_dataset_ht ,
54
68
):
55
- mock_rdc_datasets .return_value = ['a' , 'b' ]
56
69
mock_import_dataset_ht .side_effect = [
57
70
hl .Table .parallelize (
58
71
[],
@@ -64,28 +77,18 @@ def test_create_globals_from_dataset_ht_configs(
64
77
),
65
78
test_enum = hl .tstr ,
66
79
),
67
- globals = hl .Struct (version = 'a_version' ),
68
- key = ['locus' , 'alleles' ],
69
- ),
70
- hl .Table .parallelize (
71
- [],
72
- schema = hl .tstruct (
73
- locus = hl .tlocus ('GRCh38' ),
74
- alleles = hl .tarray (hl .tstr ),
75
- info = hl .tstruct (
76
- test_select = hl .tint ,
77
- test_select_2 = hl .tint ,
78
- ),
79
- test_enum = hl .tstr ,
80
+ globals = hl .Struct (
81
+ version = 'a_version' ,
82
+ path = 'a_path' ,
83
+ enums = hl .Struct (test_enum = ['A' , 'B' ]),
80
84
),
81
- globals = hl .Struct (version = 'b_version' ),
82
85
key = ['locus' , 'alleles' ],
83
86
),
87
+ B_TABLE ,
84
88
]
85
89
dataset_config_globals = Globals .from_dataset_configs (
86
- rdc = ReferenceDatasetCollection .INTERVAL ,
87
- dataset_type = DatasetType .SNV_INDEL ,
88
90
reference_genome = ReferenceGenome .GRCh38 ,
91
+ datasets = ['a' , 'b' ],
89
92
)
90
93
self .assertTrue (
91
94
dataset_config_globals .versions == {'a' : 'a_version' , 'b' : 'b_version' },
@@ -105,6 +108,36 @@ def test_create_globals_from_dataset_ht_configs(
105
108
},
106
109
)
107
110
111
+ @mock .patch .dict ('v03_pipeline.lib.reference_data.compare_globals.CONFIG' , CONFIG )
112
+ @mock .patch (
113
+ 'v03_pipeline.lib.reference_data.dataset_table_operations.hl.read_table' ,
114
+ )
115
+ def test_create_globals_from_dataset_configs_single_dataset (self , mock_read_table ):
116
+ # by mocking hl.read_table() (only possible for a dataset without a custom import),
117
+ # we can test the code inside import_ht_from_config_path()
118
+ mock_read_table .return_value = B_TABLE
119
+
120
+ dataset_config_globals = Globals .from_dataset_configs (
121
+ reference_genome = ReferenceGenome .GRCh38 ,
122
+ datasets = ['b' ],
123
+ )
124
+
125
+ self .assertTrue (
126
+ dataset_config_globals .versions == {'b' : 'b_version' },
127
+ )
128
+ self .assertTrue (
129
+ dataset_config_globals .paths == {'b' : 'b_path' },
130
+ )
131
+ self .assertTrue (
132
+ dataset_config_globals .enums == {'b' : {'test_enum' : ['C' , 'D' ]}},
133
+ )
134
+ self .assertTrue (
135
+ dataset_config_globals .selects
136
+ == {
137
+ 'b' : {'test_select' , 'field2' , 'test_enum_id' },
138
+ },
139
+ )
140
+
108
141
def test_from_rdc_or_annotations_ht (self ):
109
142
rdc_ht = hl .Table .parallelize (
110
143
[],
@@ -134,8 +167,7 @@ def test_from_rdc_or_annotations_ht(self):
134
167
)
135
168
rdc_globals = Globals .from_ht (
136
169
rdc_ht ,
137
- rdc = ReferenceDatasetCollection .INTERVAL ,
138
- dataset_type = DatasetType .SNV_INDEL ,
170
+ ['gnomad_non_coding_constraint' , 'screen' ],
139
171
)
140
172
self .assertTrue (
141
173
rdc_globals .versions
@@ -159,11 +191,8 @@ def test_from_rdc_or_annotations_ht(self):
159
191
},
160
192
)
161
193
162
- @mock .patch .object (ReferenceDatasetCollection , 'datasets' )
163
- def test_get_datasets_to_update_version_different (self , mock_rdc_datasets ):
164
- mock_rdc_datasets .return_value = ['a' , 'b' , 'c' ]
194
+ def test_get_datasets_to_update_version_different (self ):
165
195
result = get_datasets_to_update (
166
- rdc = ReferenceDatasetCollection .INTERVAL ,
167
196
ht1_globals = Globals (
168
197
paths = {'a' : 'a_path' , 'b' : 'b_path' },
169
198
# 'a' has a different version, 'c' is missing version in ht2_globals
@@ -177,15 +206,11 @@ def test_get_datasets_to_update_version_different(self, mock_rdc_datasets):
177
206
enums = {'a' : {}, 'b' : {}},
178
207
selects = {'a' : set (), 'b' : set ()},
179
208
),
180
- dataset_type = DatasetType .SNV_INDEL ,
181
209
)
182
210
self .assertTrue (result == ['a' , 'c' ])
183
211
184
- @mock .patch .object (ReferenceDatasetCollection , 'datasets' )
185
- def test_get_datasets_to_update_path_different (self , mock_rdc_datasets ):
186
- mock_rdc_datasets .return_value = ['a' , 'b' , 'c' ]
212
+ def test_get_datasets_to_update_path_different (self ):
187
213
result = get_datasets_to_update (
188
- rdc = ReferenceDatasetCollection .INTERVAL ,
189
214
ht1_globals = Globals (
190
215
# 'b' has a different path, 'c' is missing path in ht2_globals
191
216
paths = {'a' : 'a_path' , 'b' : 'old_b_path' , 'c' : 'extra_c_path' },
@@ -199,15 +224,11 @@ def test_get_datasets_to_update_path_different(self, mock_rdc_datasets):
199
224
enums = {'a' : {}, 'b' : {}},
200
225
selects = {'a' : set (), 'b' : set ()},
201
226
),
202
- dataset_type = DatasetType .SNV_INDEL ,
203
227
)
204
228
self .assertTrue (result == ['b' , 'c' ])
205
229
206
- @mock .patch .object (ReferenceDatasetCollection , 'datasets' )
207
- def test_get_datasets_to_update_enum_different (self , mock_rdc_datasets ):
208
- mock_rdc_datasets .return_value = ['a' , 'b' , 'c' ]
230
+ def test_get_datasets_to_update_enum_different (self ):
209
231
result = get_datasets_to_update (
210
- rdc = ReferenceDatasetCollection .INTERVAL ,
211
232
ht1_globals = Globals (
212
233
paths = {'a' : 'a_path' , 'b' : 'b_path' },
213
234
versions = {'a' : 'v1' , 'b' : 'v2' },
@@ -225,15 +246,11 @@ def test_get_datasets_to_update_enum_different(self, mock_rdc_datasets):
225
246
enums = {'a' : {'test_enum' : ['C' , 'D' ]}, 'b' : {'enum_key_2' : []}},
226
247
selects = {'a' : set (), 'b' : set ()},
227
248
),
228
- dataset_type = DatasetType .SNV_INDEL ,
229
249
)
230
250
self .assertTrue (result == ['a' , 'b' , 'c' ])
231
251
232
- @mock .patch .object (ReferenceDatasetCollection , 'datasets' )
233
- def test_get_datasets_to_update_select_different (self , mock_rdc_datasets ):
234
- mock_rdc_datasets .return_value = ['a' , 'b' , 'c' ]
252
+ def test_get_datasets_to_update_select_different (self ):
235
253
result = get_datasets_to_update (
236
- rdc = ReferenceDatasetCollection .INTERVAL ,
237
254
ht1_globals = Globals (
238
255
paths = {'a' : 'a_path' , 'b' : 'b_path' },
239
256
versions = {'a' : 'v1' , 'b' : 'v2' },
@@ -251,6 +268,5 @@ def test_get_datasets_to_update_select_different(self, mock_rdc_datasets):
251
268
enums = {'a' : {}, 'b' : {}},
252
269
selects = {'a' : {'field1' }, 'b' : {'test_select_2' }},
253
270
),
254
- dataset_type = DatasetType .SNV_INDEL ,
255
271
)
256
272
self .assertTrue (result == ['a' , 'b' , 'c' ])
0 commit comments