Skip to content

Commit 36b8f5e

Browse files
authored
bugfix: handle missing metrics in tdr (#1060)
* handle missing metrics in tdr * ruff
1 parent 48537ac commit 36b8f5e

File tree

2 files changed

+32
-5
lines changed

2 files changed

+32
-5
lines changed

v03_pipeline/lib/misc/terra_data_repository.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
from v03_pipeline.lib.misc.requests import requests_retry_session
1111

1212
BIGQUERY_METRICS = [
13-
'collaborator_sample_id',
1413
'predicted_sex',
1514
'contamination_rate',
1615
'percent_bases_at_20x',
16+
'collaborator_sample_id',
1717
'mean_coverage',
1818
]
1919
BIGQUERY_RESOURCE = 'bigquery'
@@ -63,9 +63,18 @@ def bq_metrics_query(bq_table_name: str) -> google.cloud.bigquery.table.RowItera
6363
msg = f'{bq_table_name} does not match expected pattern'
6464
raise ValueError(msg)
6565
client = bigquery.Client()
66+
67+
# not all columns are guaranteed to be present, coalesce if missing
68+
table_ddl = next(
69+
client.query_and_wait(
70+
f"""
71+
SELECT ddl FROM `{bq_table_name}`.INFORMATION_SCHEMA.TABLES where table_name='sample';
72+
""", # noqa: S608
73+
),
74+
)[0]
75+
metrics = [(m if m in table_ddl else f'NULL AS {m}') for m in BIGQUERY_METRICS]
6676
return client.query_and_wait(
6777
f"""
68-
SELECT {','.join(BIGQUERY_METRICS)}
69-
FROM `{bq_table_name}.sample`
70-
""", # noqa: S608
78+
SELECT {','.join(metrics)} FROM `{bq_table_name}.sample`;
79+
""", # noqa: S608
7180
)

v03_pipeline/lib/misc/terra_data_repository_test.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
import os
33
import unittest
44
from types import SimpleNamespace
5-
from unittest.mock import Mock, patch
5+
from unittest.mock import Mock, call, patch
66

77
import responses
88

99
from v03_pipeline.lib.misc.terra_data_repository import (
1010
TDR_ROOT_URL,
1111
_get_dataset_ids,
12+
bq_metrics_query,
1213
gen_bq_table_names,
1314
)
1415

@@ -301,3 +302,20 @@ def test_gen_bq_table_names(self, _: Mock) -> None:
301302
'datarepo-aada2e3b.datarepo_RP_3059',
302303
],
303304
)
305+
306+
@patch('v03_pipeline.lib.misc.terra_data_repository.bigquery.Client')
307+
def test_bq_metrics_query_missing_metrics(
308+
self,
309+
mock_bq_client: Mock,
310+
_: Mock,
311+
) -> None:
312+
mock_bq_client.return_value.query_and_wait.return_value = iter(
313+
[['predicted_sex,contamination_rate,percent_bases_at_20x']],
314+
)
315+
bq_metrics_query('datarepo-7242affb.datarepo_RP_3053')
316+
self.assertEqual(
317+
mock_bq_client.return_value.query_and_wait.mock_calls[1],
318+
call(
319+
'\n SELECT predicted_sex,contamination_rate,percent_bases_at_20x,NULL AS collaborator_sample_id,NULL AS mean_coverage FROM `datarepo-7242affb.datarepo_RP_3053.sample`;\n ',
320+
),
321+
)

0 commit comments

Comments
 (0)