-
Notifications
You must be signed in to change notification settings - Fork 20
Reference Data Update Type Equality Check #789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
0973dc9
27ac8d4
f227a05
fa6db6d
15ea14c
b5b7e47
314484b
a2dd695
3573cb7
6a6373d
d04325e
d3ee4fa
9edfc4f
093890e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,8 +103,15 @@ def test_create_globals_from_dataset_configs( | |
self.assertTrue( | ||
dataset_config_globals.selects | ||
== { | ||
'a': {'test_select', 'test_enum_id'}, | ||
'b': {'test_select', 'field2', 'test_enum_id'}, | ||
'a': { | ||
'test_select': hl.tint32, | ||
'test_enum_id': hl.tint32, | ||
}, | ||
'b': { | ||
'test_select': hl.tint32, | ||
'field2': hl.tint32, | ||
'test_enum_id': hl.tint32, | ||
}, | ||
}, | ||
) | ||
|
||
|
@@ -134,7 +141,11 @@ def test_create_globals_from_dataset_configs_single_dataset(self, mock_read_tabl | |
self.assertTrue( | ||
dataset_config_globals.selects | ||
== { | ||
'b': {'test_select', 'field2', 'test_enum_id'}, | ||
'b': { | ||
'test_select': hl.tint32, | ||
'field2': hl.tint32, | ||
'test_enum_id': hl.tint32, | ||
}, | ||
}, | ||
) | ||
|
||
|
@@ -183,11 +194,12 @@ def test_from_rdc_or_annotations_ht(self): | |
self.assertTrue( | ||
rdc_globals.enums == {'screen': {'region_type': ['C', 'D']}}, | ||
) | ||
print(rdc_globals.selects) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume this is left in by mistake? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep 🤦 I'm going to add a lint rule for this... |
||
self.assertTrue( | ||
rdc_globals.selects | ||
== { | ||
'gnomad_non_coding_constraint': {'z_score'}, | ||
'screen': {'region_type_ids'}, | ||
'gnomad_non_coding_constraint': {'z_score': hl.tfloat32}, | ||
'screen': {'region_type_ids': hl.tarray(hl.tint32)}, | ||
}, | ||
) | ||
|
||
|
@@ -198,13 +210,13 @@ def test_get_datasets_to_update_version_different(self): | |
# 'a' has a different version, 'c' is missing version in ht2_globals | ||
versions={'a': 'v2', 'b': 'v2', 'c': 'v1'}, | ||
enums={'a': {}, 'b': {}, 'c': {}}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path', 'b': 'b_path'}, | ||
versions={'a': 'v1', 'b': 'v2'}, | ||
enums={'a': {}, 'b': {}}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
) | ||
self.assertTrue(result == ['a', 'c']) | ||
|
@@ -216,13 +228,13 @@ def test_get_datasets_to_update_path_different(self): | |
paths={'a': 'a_path', 'b': 'old_b_path', 'c': 'extra_c_path'}, | ||
versions={'a': 'v1', 'b': 'v2'}, | ||
enums={'a': {}, 'b': {}}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path', 'b': 'b_path'}, | ||
versions={'a': 'v1', 'b': 'v2'}, | ||
enums={'a': {}, 'b': {}}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
) | ||
self.assertTrue(result == ['b', 'c']) | ||
|
@@ -238,13 +250,13 @@ def test_get_datasets_to_update_enum_different(self): | |
'b': {'enum_key_1': []}, | ||
'c': {}, | ||
}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path', 'b': 'b_path'}, | ||
versions={'a': 'v1', 'b': 'v2'}, | ||
enums={'a': {'test_enum': ['C', 'D']}, 'b': {'enum_key_2': []}}, | ||
selects={'a': set(), 'b': set()}, | ||
selects={'a': {}, 'b': {}}, | ||
), | ||
) | ||
self.assertTrue(result == ['a', 'b', 'c']) | ||
|
@@ -257,16 +269,54 @@ def test_get_datasets_to_update_select_different(self): | |
enums={'a': {}, 'b': {}}, | ||
# 'a' has extra select, 'b' has different select, 'c' is missing select in ht2_globals | ||
selects={ | ||
'a': {'field1', 'field2'}, | ||
'b': {'test_select'}, | ||
'c': set('test_select'), | ||
'a': {'field1': hl.tint32, 'field2': hl.tint32}, | ||
'b': {'test_select': hl.tint32}, | ||
'c': {'test_select': hl.tint32}, | ||
}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path', 'b': 'b_path'}, | ||
versions={'a': 'v1', 'b': 'v2'}, | ||
enums={'a': {}, 'b': {}}, | ||
selects={'a': {'field1'}, 'b': {'test_select_2'}}, | ||
selects={'a': {'field1': hl.tint32}, 'b': {'test_select_2': hl.tint32}}, | ||
), | ||
) | ||
self.assertTrue(result == ['a', 'b', 'c']) | ||
|
||
def test_get_datasets_to_update_select_type_validation(self): | ||
self.assertRaisesRegex( | ||
ValueError, | ||
"Unexpected field types detected in a: \\[\\('field1', dtype\\('int32'\\)\\)\\]", | ||
get_datasets_to_update, | ||
ht1_globals=Globals( | ||
paths={'a': 'a_path'}, | ||
versions={'a': 'v1'}, | ||
enums={'a': {}}, | ||
selects={ | ||
'a': {'field1': hl.tarray(hl.tint32)}, | ||
}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path'}, | ||
versions={'a': 'v1'}, | ||
enums={'a': {}}, | ||
selects={'a': {'field1': hl.tint32, 'field2': hl.tint32}}, | ||
), | ||
) | ||
result = get_datasets_to_update( | ||
ht1_globals=Globals( | ||
paths={'a': 'a_path'}, | ||
versions={'a': 'v1'}, | ||
enums={'a': {}}, | ||
selects={ | ||
'a': {'field1': hl.tarray(hl.tint32)}, | ||
}, | ||
), | ||
ht2_globals=Globals( | ||
paths={'a': 'a_path'}, | ||
versions={'a': 'v1'}, | ||
enums={'a': {}}, | ||
selects={'a': {'field1': hl.tarray(hl.tint32), 'field2': hl.tint32}}, | ||
), | ||
) | ||
self.assertTrue(result == ['a']) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,7 +45,7 @@ | |
hl.tstruct( | ||
locus=hl.tlocus('GRCh38'), | ||
alleles=hl.tarray(hl.tstr), | ||
PHRED=hl.tint32, | ||
PHRED=hl.tfloat32, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we had a few types in our tests/mock data that weren't right and doing this work caught them 🤩 ! |
||
), | ||
key=['locus', 'alleles'], | ||
globals=hl.Struct( | ||
|
@@ -760,7 +760,7 @@ def test_update_vat_with_updated_rdc_snv_indel_38( | |
conditions=None, | ||
), | ||
dbnsfp=hl.Struct( | ||
REVEL_score=0.043, | ||
REVEL_score=0.0430000014603138, | ||
SIFT_score=None, | ||
Polyphen2_HVAR_score=None, | ||
MutationTaster_pred_id=0, | ||
|
@@ -1168,7 +1168,7 @@ def test_update_vat_with_updated_rdc_snv_indel_37( | |
conditions=None, | ||
), | ||
dbnsfp=hl.Struct( | ||
REVEL_score=0.043, | ||
REVEL_score=0.0430000014603138, | ||
SIFT_score=None, | ||
Polyphen2_HVAR_score=None, | ||
MutationTaster_pred_id=0, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.128-eead8100a1c1 | ||
Created at 2024/05/09 20:02:21 | ||
Written with version 0.2.130-bea04d9c79b5 | ||
Created at 2024/05/20 13:48:16 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.128-eead8100a1c1 | ||
Created at 2024/03/21 11:28:13 | ||
Written with version 0.2.130-bea04d9c79b5 | ||
Created at 2024/05/20 15:38:26 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.128-eead8100a1c1 | ||
Created at 2024/03/21 11:35:30 | ||
Written with version 0.2.130-bea04d9c79b5 | ||
Created at 2024/05/20 14:08:17 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.114-cc8d36408b36 | ||
Created at 2023/07/13 19:51:12 | ||
Written with version 0.2.130-bea04d9c79b5 | ||
Created at 2024/05/20 13:22:32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea here is to just ensure that any fields shared between the existing table and an "update" are identically typed.
imo we're getting very close to needing a statically defined schema for these, but trying to squeeze that in was much messier.