Skip to content

Commit 1bb1d80

Browse files
committed
merge
2 parents 20da979 + ec656fb commit 1bb1d80

File tree

120 files changed

+1875
-707
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+1875
-707
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,11 @@ jobs:
3030
python -m pip install --upgrade pip
3131
pip install -r requirements.txt
3232
pip install -r requirements-dev.txt
33-
- name: Check Import Sort
34-
run: isort . --check --diff
35-
- name: Check Black
36-
run: black . --check --diff
3733
- name: Check Ruff
3834
run: ruff . --output-format github
3935
- name: Unit Tests
4036
run: |
41-
export ACCESS_PRIVATE_DATASETS=1
37+
export ACCESS_PRIVATE_REFERENCE_DATASETS=1
4238
export PYSPARK_SUBMIT_ARGS='--driver-memory 8G pyspark-shell'
4339
nosetests --with-coverage --cover-package v03_pipeline/lib v03_pipeline/lib
4440
coverage report --omit '*test*' --fail-under=75

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ downloads/
3131
eggs/
3232
.eggs/
3333
lib64/
34-
parts/
3534
sdist/
3635
wheels/
3736
share/python-wheels/
Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
import tempfile
22

33
import hail as hl
4+
5+
from v03_pipeline.lib.model import ReferenceGenome
46
from v03_pipeline.lib.reference_data.clinvar import (
57
download_and_import_latest_clinvar_vcf,
68
CLINVAR_GOLD_STARS_LOOKUP,
79
)
810
from hail_scripts.utils.hail_utils import write_ht
911

10-
CLINVAR_PATH = 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh{genome_version}/clinvar.vcf.gz'
11-
CLINVAR_HT_PATH = 'gs://seqr-reference-data/GRCh{genome_version}/clinvar/clinvar.GRCh{genome_version}.ht'
12+
CLINVAR_PATH = 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_{reference_genome}/clinvar.vcf.gz'
13+
CLINVAR_HT_PATH = 'gs://seqr-reference-data/{reference_genome}/clinvar/clinvar.{reference_genome}.ht'
1214

13-
for genome_version in ["37", "38"]:
14-
clinvar_url = CLINVAR_PATH.format(genome_version=genome_version)
15-
ht = download_and_import_latest_clinvar_vcf(clinvar_url, genome_version)
15+
for reference_genome in ReferenceGenome:
16+
clinvar_url = CLINVAR_PATH.format(reference_genome=reference_genome.value)
17+
ht = download_and_import_latest_clinvar_vcf(clinvar_url, reference_genome)
1618
timestamp = hl.eval(ht.version)
1719
ht = ht.annotate(
1820
gold_stars=CLINVAR_GOLD_STARS_LOOKUP.get(hl.delimit(ht.info.CLNREVSTAT))
@@ -21,7 +23,7 @@
2123
ht = ht.repartition(100)
2224
write_ht(
2325
ht,
24-
CLINVAR_HT_PATH.format(genome_version=genome_version).replace(".ht", ".")
26+
CLINVAR_HT_PATH.format(reference_genome=reference_genome.value).replace(".ht", ".")
2527
+ timestamp
2628
+ ".ht",
2729
)

download_and_create_reference_datasets/v02/hail_scripts/write_combined_interval_ref_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import hail as hl
55

6-
from v03_pipeline.lib.reference_data.combine import join_hts
6+
from v03_pipeline.lib.reference_data.dataset_table_operations import join_hts
77

88
VERSION = '2.0.5'
99
OUTPUT_PATH = "gs://seqr-reference-data/GRCh38/combined_interval_reference_data/combined_interval_reference_data.ht"

download_and_create_reference_datasets/v02/hail_scripts/write_combined_reference_data_ht.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import hail as hl
55

6-
from v03_pipeline.lib.reference_data.combine import join_hts
6+
from v03_pipeline.lib.reference_data.dataset_table_operations import join_hts
77
from v03_pipeline.lib.reference_data.config import CONFIG
88

99
VERSION = '2.0.4'

download_and_create_reference_datasets/v02/hail_scripts/write_dbnsfp_ht.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
'2.9.3': {
66
'reference_genome': '37',
77
'source_path': 'gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.chr*.gz',
8-
'output_path': 'gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.ht',
8+
'output_path': 'gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.with_new_scores.ht',
99
},
1010
'4.2': {
1111
'reference_genome': '38',
1212
'source_path': 'gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.chr*.gz',
13-
'output_path': 'gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.ht',
13+
'output_path': 'gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
1414
},
1515
}
1616

@@ -21,9 +21,9 @@
2121
'pos(1-coor)': tint,
2222
'ref': tstr,
2323
'alt': tstr,
24-
'SIFT_pred': tstr,
24+
'SIFT_score': tstr,
2525
'Polyphen2_HDIV_pred': tstr,
26-
'Polyphen2_HVAR_pred': tstr,
26+
'Polyphen2_HVAR_score': tstr,
2727
'LRT_pred': tstr,
2828
'MutationTaster_pred': tstr,
2929
'MutationAssessor_pred': tstr,
@@ -68,16 +68,16 @@
6868
'pos(1-based)': tint,
6969
'ref': tstr,
7070
'alt': tstr,
71-
'SIFT_pred': tstr,
72-
'Polyphen2_HVAR_pred': tstr,
71+
'SIFT_score': tstr,
72+
'Polyphen2_HVAR_score': tstr,
7373
'MutationTaster_pred': tstr,
7474
'FATHMM_pred': tstr,
7575
'VEST4_score': tstr,
7676
'MetaSVM_pred': tstr,
7777
'REVEL_score': tstr,
7878
'GERP++_RS': tstr,
7979
'phastCons100way_vertebrate': tstr,
80-
'fathmm-MKL_coding_pred': tstr,
80+
'fathmm-MKL_coding_score': tfloat,
8181
'MutPred_score': tstr,
8282
}
8383
}

download_and_create_reference_datasets/v02/mito/write_combined_mito_reference_data_hts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import hail as hl
66

7-
from v03_pipeline.lib.reference_data.combine import join_hts
7+
from v03_pipeline.lib.reference_data.dataset_table_operations import join_hts
88

99
VERSION = '2.0.4'
1010
OUTPUT_PATH = 'gs://seqr-reference-data/GRCh38/mitochondrial/all_mito_reference_data/combined_reference_data_chrM.ht'

luigi_pipeline/tests/test_hail_tasks.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,10 @@ def _hail_matrix_table_task(self):
4040

4141
def _set_validation_configs(self):
4242
global_config = GlobalConfig()
43-
global_config.param_kwargs[
44-
'validation_37_coding_ht'
45-
] = global_config.validation_37_coding_ht = 'tests/data/validation_37_coding.ht'
46-
global_config.param_kwargs[
47-
'validation_37_noncoding_ht'
48-
] = (
43+
global_config.param_kwargs['validation_37_coding_ht'] = (
44+
global_config.validation_37_coding_ht
45+
) = 'tests/data/validation_37_coding.ht'
46+
global_config.param_kwargs['validation_37_noncoding_ht'] = (
4947
global_config.validation_37_noncoding_ht
5048
) = 'tests/data/validation_37_noncoding.ht'
5149

pyproject.toml

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,6 @@ include = ["v03_pipeline*"]
1414
exclude = ["v03_pipeline.bin", "v03_pipeline*test*"]
1515
namespaces = false
1616

17-
[tool.black]
18-
line-length = 88
19-
skip-string-normalization = true
20-
extend-exclude = '''
21-
(
22-
download_and_create_reference_datasets/v02
23-
| gcloud_dataproc
24-
| hail_scripts/computed_fields
25-
| hail_scripts/elasticsearch
26-
| hail_scripts/update_models
27-
| hail_scripts/utils
28-
| kubernetes
29-
| luigi_pipeline/lib
30-
| luigi_pipeline/seqr.*\.py$
31-
| luigi_pipeline/tests/data
32-
)
33-
'''
34-
35-
[tool.isort]
36-
include_trailing_comma = 'True'
37-
known_external_package = ['hail_scripts']
38-
line_length = 88
39-
profile = 'black'
40-
sections = ['FUTURE', 'STDLIB', 'THIRDPARTY', 'EXTERNAL_PACKAGE', 'FIRSTPARTY', 'LOCALFOLDER']
41-
skip_glob = [
42-
'download_and_create_reference_datasets/v02/*',
43-
'gcloud_dataproc/*',
44-
'hail_scripts/computed_fields/*',
45-
'hail_scripts/elasticsearch/*',
46-
'hail_scripts/update_models/*',
47-
'hail_scripts/utils/*',
48-
'kubernetes/*'
49-
]
50-
5117
[tool.mypy]
5218
packages = 'v03_pipeline'
5319
python_version = "3.10"
@@ -71,7 +37,6 @@ ignore = [
7137
"G004", # logging-f-string, these are fine for now
7238

7339
# Rule Groupings
74-
"I", # isort is enabled so this is unnecessary (for now, maybe we want to disable isort)
7540
"D", # pydocstyle is for docs... we have none
7641
"FBT", # flake-boolean-trap... disallows boolean args to functions... fixing this code will require refactors.
7742
"ANN", # flake8-annotations is for typed code
@@ -102,3 +67,6 @@ inline-quotes = "single"
10267

10368
[tool.ruff.pylint]
10469
max-args = 6
70+
71+
[tool.ruff.format]
72+
quote-style = "single"

requirements-dev.in

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
-c requirements.txt
2-
black>=23.7.0
32
coverage>=7.2.2
4-
isort>=5.12.0
53
mypy>=1.2.0
64
nose-py3>=1.6.3 # bug in regular nose here: https://github.com/nose-devs/nose/issues/1099#issuecomment-577647469
75
pip-tools>=6.12.3
86
responses>=0.23.1
9-
ruff>=0.0.259
7+
ruff>=0.1.8

0 commit comments

Comments
 (0)