Skip to content

Commit 497d36f

Browse files
authored
Caid refactor (#927) (#928)
* CAID refactor * lint * lint * remove mock * string defaults * format * ruff * add mock values * missed a test * ruff
1 parent 6d08183 commit 497d36f

File tree

7 files changed

+23
-66
lines changed

7 files changed

+23
-66
lines changed

requirements.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@ hail==0.2.132
44
luigi>=3.4.0
55
gnomad==0.6.4
66
google-cloud-storage>=2.14.0
7-
google-cloud-secret-manager>=2.20.0
87
aiofiles==24.1.0
98
pydantic==2.8.2

requirements.txt

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,10 @@ frozenlist==1.4.0
103103
# hail
104104
gnomad==0.6.4
105105
# via -r requirements.in
106-
google-api-core[grpc]==2.14.0
106+
google-api-core==2.14.0
107107
# via
108108
# google-api-python-client
109109
# google-cloud-core
110-
# google-cloud-secret-manager
111110
# google-cloud-storage
112111
google-api-python-client==2.108.0
113112
# via -r requirements.in
@@ -118,7 +117,6 @@ google-auth==2.23.4
118117
# google-auth-httplib2
119118
# google-auth-oauthlib
120119
# google-cloud-core
121-
# google-cloud-secret-manager
122120
# google-cloud-storage
123121
# hail
124122
google-auth-httplib2==0.1.1
@@ -127,8 +125,6 @@ google-auth-oauthlib==0.8.0
127125
# via hail
128126
google-cloud-core==2.4.1
129127
# via google-cloud-storage
130-
google-cloud-secret-manager==2.20.0
131-
# via -r requirements.in
132128
google-cloud-storage==2.14.0
133129
# via -r requirements.in
134130
google-crc32c==1.5.0
@@ -137,20 +133,7 @@ google-crc32c==1.5.0
137133
# google-resumable-media
138134
google-resumable-media==2.7.0
139135
# via google-cloud-storage
140-
googleapis-common-protos[grpc]==1.61.0
141-
# via
142-
# google-api-core
143-
# grpc-google-iam-v1
144-
# grpcio-status
145-
grpc-google-iam-v1==0.13.0
146-
# via google-cloud-secret-manager
147-
grpcio==1.63.0
148-
# via
149-
# google-api-core
150-
# googleapis-common-protos
151-
# grpc-google-iam-v1
152-
# grpcio-status
153-
grpcio-status==1.48.2
136+
googleapis-common-protos==1.61.0
154137
# via google-api-core
155138
hail==0.2.132
156139
# via -r requirements.in
@@ -249,17 +232,11 @@ portalocker==2.8.2
249232
# via msal-extensions
250233
prompt-toolkit==3.0.41
251234
# via ipython
252-
proto-plus==1.23.0
253-
# via google-cloud-secret-manager
254235
protobuf==3.20.2
255236
# via
256237
# google-api-core
257-
# google-cloud-secret-manager
258238
# googleapis-common-protos
259-
# grpc-google-iam-v1
260-
# grpcio-status
261239
# hail
262-
# proto-plus
263240
ptyprocess==0.7.0
264241
# via pexpect
265242
pure-eval==0.2.2
@@ -285,9 +262,7 @@ pygments==2.17.2
285262
# ipython
286263
# rich
287264
pyjwt[crypto]==2.8.0
288-
# via
289-
# msal
290-
# pyjwt
265+
# via msal
291266
pyparsing==3.1.1
292267
# via httplib2
293268
pyspark==3.5.1

v03_pipeline/lib/misc/allele_registry.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
import dataclasses
22
import hashlib
3-
import json
43
import math
54
import time
65
import uuid
76

87
import hail as hl
98
import hailtop.fs as hfs
109
import requests
11-
from google.cloud import secretmanager
1210
from requests import HTTPError
1311

1412
from v03_pipeline.lib.logger import get_logger
@@ -106,7 +104,10 @@ def register_alleles(
106104

107105

108106
def build_url(base_url: str, reference_genome: ReferenceGenome) -> str:
109-
login, password = get_ar_credentials_from_secret_manager()
107+
login, password = (
108+
Env.CLINGEN_ALLELE_REGISTRY_LOGIN,
109+
Env.CLINGEN_ALLELE_REGISTRY_PASSWORD,
110+
)
110111

111112
# Request a gnomad ID for the correct reference genome
112113
base_url = base_url.format(reference_genome.allele_registry_gnomad_id)
@@ -118,25 +119,6 @@ def build_url(base_url: str, reference_genome: ReferenceGenome) -> str:
118119
return base_url + '&gbLogin=' + login + '&gbTime=' + gb_time + '&gbToken=' + token
119120

120121

121-
def get_ar_credentials_from_secret_manager() -> tuple[str, str]:
122-
if Env.ALLELE_REGISTRY_SECRET_NAME is None:
123-
msg = (
124-
'SHOULD_REGISTER_ALLELES is True but cannot get allele registry credentials '
125-
'because ALLELE_REGISTRY_SECRET_NAME is not set'
126-
)
127-
raise ValueError(msg)
128-
129-
client = secretmanager.SecretManagerServiceClient()
130-
name = client.secret_version_path(
131-
Env.PROJECT_ID,
132-
Env.ALLELE_REGISTRY_SECRET_NAME,
133-
'latest',
134-
)
135-
response = client.access_secret_version(request={'name': name})
136-
payload_dict = json.loads(response.payload.data.decode('UTF-8'))
137-
return payload_dict['login'], payload_dict['password']
138-
139-
140122
def handle_api_response( # noqa: C901
141123
res: requests.Response,
142124
base_url: str,

v03_pipeline/lib/misc/allele_registry_test.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,17 @@ def tearDown(self):
2626
shutil.rmtree(self.temp_dir.name)
2727

2828
@patch.object(requests, 'put')
29-
@patch(
30-
'v03_pipeline.lib.misc.allele_registry.get_ar_credentials_from_secret_manager',
31-
)
3229
@patch('v03_pipeline.lib.misc.allele_registry.Env')
3330
@patch('v03_pipeline.lib.misc.allele_registry.logger')
3431
def test_register_alleles_38(
3532
self,
3633
mock_logger: Mock,
3734
mock_env: Mock,
38-
mock_get_credentials: Mock,
3935
mock_put_request: Mock,
4036
):
41-
mock_get_credentials.return_value = ('', '')
4237
mock_env.HAIL_TMP_DIR = self.temp_dir.name
38+
mock_env.CLINGEN_ALLELE_REGISTRY_LOGIN = ''
39+
mock_env.CLINGEN_ALLELE_REGISTRY_PASSWORD = ''
4340

4441
new_variants_ht = hl.Table.parallelize(
4542
[

v03_pipeline/lib/model/environment.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@
3636
'hail-search',
3737
)
3838
HAIL_BACKEND_SERVICE_PORT = int(os.environ.get('HAIL_BACKEND_SERVICE_PORT', '5000'))
39-
40-
# Allele registry secrets :/
41-
ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None)
42-
PROJECT_ID = os.environ.get('PROJECT_ID', None)
39+
CLINGEN_ALLELE_REGISTRY_LOGIN = os.environ.get('CLINGEN_ALLELE_REGISTRY_LOGIN', '')
40+
CLINGEN_ALLELE_REGISTRY_PASSWORD = os.environ.get(
41+
'CLINGEN_ALLELE_REGISTRY_PASSWORD',
42+
'',
43+
)
4344

4445
# Feature Flags
4546
ACCESS_PRIVATE_REFERENCE_DATASETS = (
@@ -50,7 +51,6 @@
5051
INCLUDE_PIPELINE_VERSION_IN_PREFIX = (
5152
os.environ.get('INCLUDE_PIPELINE_VERSION_IN_PREFIX') == '1'
5253
)
53-
SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1'
5454
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD = (
5555
os.environ.get('SHOULD_TRIGGER_HAIL_BACKEND_RELOAD') == '1'
5656
)
@@ -59,8 +59,9 @@
5959
@dataclass
6060
class Env:
6161
ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS
62-
ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME
6362
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
63+
CLINGEN_ALLELE_REGISTRY_LOGIN: str | None = CLINGEN_ALLELE_REGISTRY_LOGIN
64+
CLINGEN_ALLELE_REGISTRY_PASSWORD: str | None = CLINGEN_ALLELE_REGISTRY_PASSWORD
6465
EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS
6566
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH: str = GRCH37_TO_GRCH38_LIFTOVER_REF_PATH
6667
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH: str = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
@@ -71,8 +72,6 @@ class Env:
7172
INCLUDE_PIPELINE_VERSION_IN_PREFIX: bool = INCLUDE_PIPELINE_VERSION_IN_PREFIX
7273
LOADING_DATASETS_DIR: str = LOADING_DATASETS_DIR
7374
PRIVATE_REFERENCE_DATASETS_DIR: str = PRIVATE_REFERENCE_DATASETS_DIR
74-
PROJECT_ID: str | None = PROJECT_ID
7575
REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR
76-
SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES
7776
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD: bool = SHOULD_TRIGGER_HAIL_BACKEND_RELOAD
7877
VEP_REFERENCE_DATASETS_DIR: str = VEP_REFERENCE_DATASETS_DIR

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,8 @@ def test_multiple_update_vat(
258258
)
259259
# make register_alleles return CAIDs for 4 of 30 variants
260260
mock_env.GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
261-
mock_env.SHOULD_REGISTER_ALLELES = True
261+
mock_env.CLINGEN_ALLELE_REGISTRY_LOGIN = 'login'
262+
mock_env.CLINGEN_ALLELE_REGISTRY_PASSWORD = 'password1' # noqa: S105
262263
mock_register_alleles.side_effect = [
263264
iter(
264265
[
@@ -837,6 +838,7 @@ def test_update_vat_grch37(
837838
),
838839
hgmd=None,
839840
gt_stats=hl.Struct(AC=0, AN=6, AF=0.0, hom=0),
841+
CAID=None,
840842
),
841843
)
842844

v03_pipeline/lib/tasks/write_new_variants_table.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ def create_table(self) -> hl.Table:
179179
# Register the new variant alleles to the Clingen Allele Registry
180180
# and annotate new_variants table with CAID.
181181
if (
182-
Env.SHOULD_REGISTER_ALLELES
182+
Env.CLINGEN_ALLELE_REGISTRY_LOGIN
183+
and Env.CLINGEN_ALLELE_REGISTRY_PASSWORD
183184
and self.dataset_type.should_send_to_allele_registry
184185
):
185186
ar_ht = hl.Table.parallelize(
@@ -197,6 +198,8 @@ def create_table(self) -> hl.Table:
197198
):
198199
ar_ht = ar_ht.union(ar_ht_chunk)
199200
new_variants_ht = new_variants_ht.join(ar_ht, 'left')
201+
elif self.dataset_type.should_send_to_allele_registry:
202+
new_variants_ht = new_variants_ht.annotate(CAID=hl.missing(hl.tstr))
200203
return new_variants_ht.select_globals(
201204
updates={
202205
hl.Struct(

0 commit comments

Comments
 (0)