Skip to content

Commit b98fe66

Browse files
authored
feat: Handle parsing empty predicted sex into Unknown (#1000)
1 parent 533d1d4 commit b98fe66

File tree

4 files changed

+12
-6
lines changed

4 files changed

+12
-6
lines changed

v03_pipeline/lib/misc/io.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,11 @@ def select_relevant_fields(
219219
def import_imputed_sex(imputed_sex_path: str) -> hl.Table:
220220
ht = hl.import_table(imputed_sex_path)
221221
imputed_sex_lookup = hl.dict(
222-
{s.imputed_sex_value: s.value for s in Sex},
222+
{
223+
imputed_sex_value: s.value
224+
for s in Sex
225+
for imputed_sex_value in s.imputed_sex_values
226+
},
223227
)
224228
ht = ht.select(
225229
s=ht.collaborator_sample_id,

v03_pipeline/lib/misc/io_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_import_imputed_sex(self) -> None:
4747
hl.Struct(s='abc_2', predicted_sex='F'),
4848
hl.Struct(s='abc_3', predicted_sex='U'),
4949
hl.Struct(s='abc_4', predicted_sex='XYY'),
50+
hl.Struct(s='abc_5', predicted_sex='U'),
5051
],
5152
)
5253

v03_pipeline/lib/model/definitions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ class Sex(str, Enum):
1818
XYY = 'XYY'
1919

2020
@property
21-
def imputed_sex_value(self):
21+
def imputed_sex_values(self) -> list[str]:
2222
return {
23-
Sex.MALE: 'Male',
24-
Sex.FEMALE: 'Female',
25-
Sex.UNKNOWN: 'Unknown',
26-
}.get(self, self.name)
23+
Sex.MALE: ['Male'],
24+
Sex.FEMALE: ['Female'],
25+
Sex.UNKNOWN: ['', 'Unknown'],
26+
}.get(self, [self.name])
2727

2828

2929
class PipelineVersion(str, Enum):

v03_pipeline/var/test/sex_check/test_imputed_sex.tsv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ SM-DM66X abc_1 abc_1 0E+00 gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0
33
SM-DM69X abc_2 abc_2 0E+00 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram a4b04a39-9234-4028-a155-442c4acf12a0 07.021.604.3.7.8 ce74d94c-c33d-49d7-85c9-5f3cbd08aff7 2024-04-17T15:02:46 99.800000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv DNA:DNA Genomic 35.300000000 Whole Blood:Whole Blood PT-24OHM Pass PDO-32755 96.320000000 97.340000000 Female P-WG-0139 2017-04-12 04:00:00 Female RP-3061 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz 133253714921.000000000 gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv
44
SM-DPB5G abc_3 abc_3 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 Unknown P-WG-0139 2017-05-19 04:00:00 Unknown RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
55
SM-DPB5G abc_4 abc_4 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 XYY P-WG-0139 2017-05-19 04:00:00 XYY RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
6+
SM-DPB5G abc_5 abc_5 0E+00 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram ffb50687-165e-425a-a545-c3797d3a28d4 07.021.604.3.7.8 55729ba9-3ce4-47b3-9c3b-1148737ae40f 2024-04-17T15:07:57 99.670000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv DNA:DNA Genomic 41.910000000 Whole Blood:Whole Blood PT-25BR5 Pass PDO-32756 92.920000000 97.990000000 P-WG-0139 2017-05-19 04:00:00 RP-3062 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz 156149580126.000000000 gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv

0 commit comments

Comments
 (0)