Skip to content

Caid refactor (#927) #928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ hail==0.2.132
luigi>=3.4.0
gnomad==0.6.4
google-cloud-storage>=2.14.0
google-cloud-secret-manager>=2.20.0
aiofiles==24.1.0
pydantic==2.8.2
31 changes: 3 additions & 28 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,10 @@ frozenlist==1.4.0
# hail
gnomad==0.6.4
# via -r requirements.in
google-api-core[grpc]==2.14.0
google-api-core==2.14.0
# via
# google-api-python-client
# google-cloud-core
# google-cloud-secret-manager
# google-cloud-storage
google-api-python-client==2.108.0
# via -r requirements.in
Expand All @@ -118,7 +117,6 @@ google-auth==2.23.4
# google-auth-httplib2
# google-auth-oauthlib
# google-cloud-core
# google-cloud-secret-manager
# google-cloud-storage
# hail
google-auth-httplib2==0.1.1
Expand All @@ -127,8 +125,6 @@ google-auth-oauthlib==0.8.0
# via hail
google-cloud-core==2.4.1
# via google-cloud-storage
google-cloud-secret-manager==2.20.0
# via -r requirements.in
google-cloud-storage==2.14.0
# via -r requirements.in
google-crc32c==1.5.0
Expand All @@ -137,20 +133,7 @@ google-crc32c==1.5.0
# google-resumable-media
google-resumable-media==2.7.0
# via google-cloud-storage
googleapis-common-protos[grpc]==1.61.0
# via
# google-api-core
# grpc-google-iam-v1
# grpcio-status
grpc-google-iam-v1==0.13.0
# via google-cloud-secret-manager
grpcio==1.63.0
# via
# google-api-core
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
grpcio-status==1.48.2
googleapis-common-protos==1.61.0
# via google-api-core
hail==0.2.132
# via -r requirements.in
Expand Down Expand Up @@ -249,17 +232,11 @@ portalocker==2.8.2
# via msal-extensions
prompt-toolkit==3.0.41
# via ipython
proto-plus==1.23.0
# via google-cloud-secret-manager
protobuf==3.20.2
# via
# google-api-core
# google-cloud-secret-manager
# googleapis-common-protos
# grpc-google-iam-v1
# grpcio-status
# hail
# proto-plus
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2
Expand All @@ -285,9 +262,7 @@ pygments==2.17.2
# ipython
# rich
pyjwt[crypto]==2.8.0
# via
# msal
# pyjwt
# via msal
pyparsing==3.1.1
# via httplib2
pyspark==3.5.1
Expand Down
26 changes: 4 additions & 22 deletions v03_pipeline/lib/misc/allele_registry.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import dataclasses
import hashlib
import json
import math
import time
import uuid

import hail as hl
import hailtop.fs as hfs
import requests
from google.cloud import secretmanager
from requests import HTTPError

from v03_pipeline.lib.logger import get_logger
Expand Down Expand Up @@ -106,7 +104,10 @@ def register_alleles(


def build_url(base_url: str, reference_genome: ReferenceGenome) -> str:
login, password = get_ar_credentials_from_secret_manager()
login, password = (
Env.CLINGEN_ALLELE_REGISTRY_LOGIN,
Env.CLINGEN_ALLELE_REGISTRY_PASSWORD,
)

# Request a gnomad ID for the correct reference genome
base_url = base_url.format(reference_genome.allele_registry_gnomad_id)
Expand All @@ -118,25 +119,6 @@ def build_url(base_url: str, reference_genome: ReferenceGenome) -> str:
return base_url + '&gbLogin=' + login + '&gbTime=' + gb_time + '&gbToken=' + token


def get_ar_credentials_from_secret_manager() -> tuple[str, str]:
if Env.ALLELE_REGISTRY_SECRET_NAME is None:
msg = (
'SHOULD_REGISTER_ALLELES is True but cannot get allele registry credentials '
'because ALLELE_REGISTRY_SECRET_NAME is not set'
)
raise ValueError(msg)

client = secretmanager.SecretManagerServiceClient()
name = client.secret_version_path(
Env.PROJECT_ID,
Env.ALLELE_REGISTRY_SECRET_NAME,
'latest',
)
response = client.access_secret_version(request={'name': name})
payload_dict = json.loads(response.payload.data.decode('UTF-8'))
return payload_dict['login'], payload_dict['password']


def handle_api_response( # noqa: C901
res: requests.Response,
base_url: str,
Expand Down
7 changes: 2 additions & 5 deletions v03_pipeline/lib/misc/allele_registry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,17 @@ def tearDown(self):
shutil.rmtree(self.temp_dir.name)

@patch.object(requests, 'put')
@patch(
'v03_pipeline.lib.misc.allele_registry.get_ar_credentials_from_secret_manager',
)
@patch('v03_pipeline.lib.misc.allele_registry.Env')
@patch('v03_pipeline.lib.misc.allele_registry.logger')
def test_register_alleles_38(
self,
mock_logger: Mock,
mock_env: Mock,
mock_get_credentials: Mock,
mock_put_request: Mock,
):
mock_get_credentials.return_value = ('', '')
mock_env.HAIL_TMP_DIR = self.temp_dir.name
mock_env.CLINGEN_ALLELE_REGISTRY_LOGIN = ''
mock_env.CLINGEN_ALLELE_REGISTRY_PASSWORD = ''

new_variants_ht = hl.Table.parallelize(
[
Expand Down
15 changes: 7 additions & 8 deletions v03_pipeline/lib/model/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@
'hail-search',
)
HAIL_BACKEND_SERVICE_PORT = int(os.environ.get('HAIL_BACKEND_SERVICE_PORT', '5000'))

# Allele registry secrets :/
ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None)
PROJECT_ID = os.environ.get('PROJECT_ID', None)
CLINGEN_ALLELE_REGISTRY_LOGIN = os.environ.get('CLINGEN_ALLELE_REGISTRY_LOGIN', '')
CLINGEN_ALLELE_REGISTRY_PASSWORD = os.environ.get(
'CLINGEN_ALLELE_REGISTRY_PASSWORD',
'',
)

# Feature Flags
ACCESS_PRIVATE_REFERENCE_DATASETS = (
Expand All @@ -50,7 +51,6 @@
INCLUDE_PIPELINE_VERSION_IN_PREFIX = (
os.environ.get('INCLUDE_PIPELINE_VERSION_IN_PREFIX') == '1'
)
SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1'
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD = (
os.environ.get('SHOULD_TRIGGER_HAIL_BACKEND_RELOAD') == '1'
)
Expand All @@ -59,8 +59,9 @@
@dataclass
class Env:
ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS
ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
CLINGEN_ALLELE_REGISTRY_LOGIN: str | None = CLINGEN_ALLELE_REGISTRY_LOGIN
CLINGEN_ALLELE_REGISTRY_PASSWORD: str | None = CLINGEN_ALLELE_REGISTRY_PASSWORD
EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH: str = GRCH37_TO_GRCH38_LIFTOVER_REF_PATH
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH: str = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
Expand All @@ -71,8 +72,6 @@ class Env:
INCLUDE_PIPELINE_VERSION_IN_PREFIX: bool = INCLUDE_PIPELINE_VERSION_IN_PREFIX
LOADING_DATASETS_DIR: str = LOADING_DATASETS_DIR
PRIVATE_REFERENCE_DATASETS_DIR: str = PRIVATE_REFERENCE_DATASETS_DIR
PROJECT_ID: str | None = PROJECT_ID
REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR
SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD: bool = SHOULD_TRIGGER_HAIL_BACKEND_RELOAD
VEP_REFERENCE_DATASETS_DIR: str = VEP_REFERENCE_DATASETS_DIR
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ def test_multiple_update_vat(
)
# make register_alleles return CAIDs for 4 of 30 variants
mock_env.GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
mock_env.SHOULD_REGISTER_ALLELES = True
mock_env.CLINGEN_ALLELE_REGISTRY_LOGIN = 'login'
mock_env.CLINGEN_ALLELE_REGISTRY_PASSWORD = 'password1' # noqa: S105
mock_register_alleles.side_effect = [
iter(
[
Expand Down Expand Up @@ -837,6 +838,7 @@ def test_update_vat_grch37(
),
hgmd=None,
gt_stats=hl.Struct(AC=0, AN=6, AF=0.0, hom=0),
CAID=None,
),
)

Expand Down
5 changes: 4 additions & 1 deletion v03_pipeline/lib/tasks/write_new_variants_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def create_table(self) -> hl.Table:
# Register the new variant alleles to the Clingen Allele Registry
# and annotate new_variants table with CAID.
if (
Env.SHOULD_REGISTER_ALLELES
Env.CLINGEN_ALLELE_REGISTRY_LOGIN
and Env.CLINGEN_ALLELE_REGISTRY_PASSWORD
and self.dataset_type.should_send_to_allele_registry
):
ar_ht = hl.Table.parallelize(
Expand All @@ -197,6 +198,8 @@ def create_table(self) -> hl.Table:
):
ar_ht = ar_ht.union(ar_ht_chunk)
new_variants_ht = new_variants_ht.join(ar_ht, 'left')
elif self.dataset_type.should_send_to_allele_registry:
new_variants_ht = new_variants_ht.annotate(CAID=hl.missing(hl.tstr))
return new_variants_ht.select_globals(
updates={
hl.Struct(
Expand Down
Loading