Skip to content

Commit 94c983c

Browse files
bpblankenjklugherz
andauthored
Dev (#923)
* add task to write success file for run (#920) * add task to write success file for run * remove WriteProjectFamilyTablesTask from pipeline_worker * no hfs * add to init * var seqr (#922) * var seqr * ruff * comma * remove luigi config in order to move it to helm --------- Co-authored-by: Julia Klugherz <juliaklugherz@gmail.com>
1 parent 2aed82f commit 94c983c

File tree

12 files changed

+127
-62
lines changed

12 files changed

+127
-62
lines changed

v03_pipeline/bin/download_vep_reference_data.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eux
44

55
REFERENCE_GENOME=$1
6-
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
6+
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/var/seqr/vep-reference-data}
77

88
case $REFERENCE_GENOME in
99
GRCh38)

v03_pipeline/bin/pipeline_worker.py

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
)
1616
from v03_pipeline.lib.tasks import (
1717
UpdateVariantAnnotationsTableWithNewSamplesTask,
18-
WriteProjectFamilyTablesTask,
1918
)
2019
from v03_pipeline.lib.tasks.trigger_hail_backend_reload import TriggerHailBackendReload
20+
from v03_pipeline.lib.tasks.write_success_file import WriteSuccessFileTask
2121

2222
logger = get_logger(__name__)
2323

@@ -47,39 +47,25 @@ def main():
4747
)
4848
for project_guid in lpr.projects_to_run
4949
]
50-
task_kwargs = {
51-
k: v for k, v in lpr.model_dump().items() if k != 'projects_to_run'
52-
}
5350
run_id = datetime.datetime.now(datetime.timezone.utc).strftime(
5451
'%Y%m%d-%H%M%S',
5552
)
53+
loading_run_task_params = {
54+
'project_guids': lpr.projects_to_run,
55+
'project_remap_paths': project_remap_paths,
56+
'project_pedigree_paths': project_pedigree_paths,
57+
'run_id': run_id,
58+
**{k: v for k, v in lpr.model_dump().items() if k != 'projects_to_run'},
59+
}
5660
tasks = [
5761
UpdateVariantAnnotationsTableWithNewSamplesTask(
58-
project_guids=lpr.projects_to_run,
59-
project_remap_paths=project_remap_paths,
60-
project_pedigree_paths=project_pedigree_paths,
61-
run_id=run_id,
62-
**task_kwargs,
62+
**loading_run_task_params,
6363
),
64-
*[
65-
WriteProjectFamilyTablesTask(
66-
project_guid=lpr.projects_to_run[i],
67-
project_remap_path=project_remap_paths[i],
68-
project_pedigree_path=project_pedigree_paths[i],
69-
**task_kwargs,
70-
)
71-
for i in range(len(lpr.projects_to_run))
72-
],
64+
WriteSuccessFileTask(**loading_run_task_params),
7365
]
7466
if Env.SHOULD_TRIGGER_HAIL_BACKEND_RELOAD:
7567
tasks.append(
76-
TriggerHailBackendReload(
77-
project_guids=lpr.projects_to_run,
78-
project_remap_paths=project_remap_paths,
79-
project_pedigree_paths=project_pedigree_paths,
80-
run_id=run_id,
81-
**task_kwargs,
82-
),
68+
TriggerHailBackendReload(**loading_run_task_params),
8369
)
8470
luigi.build(tasks)
8571
except Exception:

v03_pipeline/bin/rsync_reference_data.bash

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eux
44

55
REFERENCE_GENOME=$1
6-
SEQR_REFERENCE_DATA=${SEQR_REFERENCE_DATA:-/seqr/seqr-reference-data}
6+
REFERENCE_DATASETS_DIR=${REFERENCE_DATASETS_DIR:-/var/seqr/seqr-reference-data}
77

88

99
case $REFERENCE_GENOME in
@@ -16,5 +16,5 @@ case $REFERENCE_GENOME in
1616
exit 1
1717
esac
1818

19-
mkdir -p $SEQR_REFERENCE_DATA/$REFERENCE_GENOME;
20-
gsutil -m rsync -rd "gs://seqr-reference-data/v03/$REFERENCE_GENOME" $SEQR_REFERENCE_DATA/$REFERENCE_GENOME
19+
mkdir -p $REFERENCE_DATASETS_DIR/$REFERENCE_GENOME;
20+
gsutil -m rsync -rd "gs://seqr-reference-data/v03/$REFERENCE_GENOME" $REFERENCE_DATASETS_DIR/$REFERENCE_GENOME

v03_pipeline/bin/vep

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eux
44

55
REFERENCE_GENOME=$1
6-
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
6+
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/var/seqr/vep-reference-data}
77
VEP_DOCKER_IMAGE="gcr.io/seqr-project/vep-docker-image"
88

99
case $REFERENCE_GENOME in

v03_pipeline/deploy/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ COPY v03_pipeline/migrations migrations
2121

2222
# Special paths
2323
COPY v03_pipeline/var/spark_config/spark-defaults.conf /usr/local/lib/python3.10/dist-packages/pyspark/conf/spark-defaults.conf
24-
COPY v03_pipeline/var/luigi_config/luigi-defaults.conf /etc/luigi/luigi.cfg
2524
COPY v03_pipeline/bin/vep /vep
2625

2726

v03_pipeline/lib/model/environment.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,23 @@
1313
HAIL_TMP_DIR = os.environ.get('HAIL_TMP_DIR', '/tmp') # noqa: S108
1414
HAIL_SEARCH_DATA_DIR = os.environ.get(
1515
'HAIL_SEARCH_DATA_DIR',
16-
'/seqr/seqr-hail-search-data',
16+
'/var/seqr/seqr-hail-search-data',
17+
)
18+
LOADING_DATASETS_DIR = os.environ.get(
19+
'LOADING_DATASETS_DIR',
20+
'/var/seqr/seqr-loading-temp',
1721
)
18-
LOADING_DATASETS_DIR = os.environ.get('LOADING_DATASETS_DIR', '/seqr/seqr-loading-temp')
1922
PRIVATE_REFERENCE_DATASETS_DIR = os.environ.get(
2023
'PRIVATE_REFERENCE_DATASETS_DIR',
21-
'/seqr/seqr-reference-data-private',
24+
'/var/seqr/seqr-reference-data-private',
2225
)
2326
REFERENCE_DATASETS_DIR = os.environ.get(
2427
'REFERENCE_DATASETS_DIR',
25-
'/seqr/seqr-reference-data',
28+
'/var/seqr/seqr-reference-data',
2629
)
2730
VEP_REFERENCE_DATASETS_DIR = os.environ.get(
2831
'VEP_REFERENCE_DATASETS_DIR',
29-
'/seqr/vep-reference-data',
32+
'/var/seqr/vep-reference-data',
3033
)
3134
HAIL_BACKEND_SERVICE_HOSTNAME = os.environ.get(
3235
'HAIL_BACKEND_SERVICE_HOSTNAME',

v03_pipeline/lib/paths.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,3 +380,18 @@ def loading_pipeline_queue_path() -> str:
380380
'loading_pipeline_queue',
381381
'request.json',
382382
)
383+
384+
385+
def pipeline_run_success_file_path(
386+
reference_genome: ReferenceGenome,
387+
dataset_type: DatasetType,
388+
run_id: str,
389+
) -> str:
390+
return os.path.join(
391+
runs_path(
392+
reference_genome,
393+
dataset_type,
394+
),
395+
run_id,
396+
'_SUCCESS',
397+
)

v03_pipeline/lib/paths_test.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_cached_reference_dataset_query_path(self) -> None:
3737
DatasetType.SNV_INDEL,
3838
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
3939
),
40-
'/seqr/seqr-reference-data/v03/GRCh38/SNV_INDEL/cached_reference_dataset_queries/clinvar_path_variants.ht',
40+
'/var/seqr/seqr-reference-data/v03/GRCh38/SNV_INDEL/cached_reference_dataset_queries/clinvar_path_variants.ht',
4141
)
4242

4343
def test_family_table_path(self) -> None:
@@ -48,7 +48,7 @@ def test_family_table_path(self) -> None:
4848
SampleType.WES,
4949
'franklin',
5050
),
51-
'/seqr/seqr-hail-search-data/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht',
51+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht',
5252
)
5353
with patch('v03_pipeline.lib.paths.Env') as mock_env:
5454
mock_env.HAIL_SEARCH_DATA_DIR = 'gs://seqr-datasets/'
@@ -100,7 +100,7 @@ def test_project_table_path(self) -> None:
100100
SampleType.WES,
101101
'R0652_pipeline_test',
102102
),
103-
'/seqr/seqr-hail-search-data/v3.1/GRCh38/MITO/projects/WES/R0652_pipeline_test.ht',
103+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/MITO/projects/WES/R0652_pipeline_test.ht',
104104
)
105105

106106
def test_valid_reference_dataset_collection_path(self) -> None:
@@ -120,7 +120,7 @@ def test_valid_reference_dataset_collection_path(self) -> None:
120120
DatasetType.SNV_INDEL,
121121
ReferenceDatasetCollection.HGMD,
122122
),
123-
'/seqr/seqr-reference-data-private/v03/GRCh38/SNV_INDEL/reference_datasets/hgmd.ht',
123+
'/var/seqr/seqr-reference-data-private/v03/GRCh38/SNV_INDEL/reference_datasets/hgmd.ht',
124124
)
125125

126126
def test_lookup_table_path(self) -> None:
@@ -129,7 +129,7 @@ def test_lookup_table_path(self) -> None:
129129
ReferenceGenome.GRCh37,
130130
DatasetType.SV,
131131
),
132-
'/seqr/seqr-hail-search-data/v3.1/GRCh37/SV/lookup.ht',
132+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh37/SV/lookup.ht',
133133
)
134134

135135
def test_sex_check_table_path(self) -> None:
@@ -139,7 +139,7 @@ def test_sex_check_table_path(self) -> None:
139139
DatasetType.SNV_INDEL,
140140
'gs://abc.efg/callset.vcf.gz',
141141
),
142-
'/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/sex_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
142+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/sex_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
143143
)
144144

145145
def test_relatedness_check_table_path(self) -> None:
@@ -149,7 +149,7 @@ def test_relatedness_check_table_path(self) -> None:
149149
DatasetType.SNV_INDEL,
150150
'gs://abc.efg/callset.vcf.gz',
151151
),
152-
'/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/relatedness_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
152+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/relatedness_check/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.ht',
153153
)
154154

155155
def test_validation_errors_for_run_path(self) -> None:
@@ -159,7 +159,7 @@ def test_validation_errors_for_run_path(self) -> None:
159159
DatasetType.SNV_INDEL,
160160
'manual__2023-06-26T18:30:09.349671+00:00',
161161
),
162-
'/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/validation_errors.json',
162+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/validation_errors.json',
163163
)
164164

165165
def test_metadata_for_run_path(self) -> None:
@@ -169,7 +169,7 @@ def test_metadata_for_run_path(self) -> None:
169169
DatasetType.SNV_INDEL,
170170
'manual__2023-06-26T18:30:09.349671+00:00',
171171
),
172-
'/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/metadata.json',
172+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/metadata.json',
173173
)
174174

175175
def test_variant_annotations_table_path(self) -> None:
@@ -178,7 +178,7 @@ def test_variant_annotations_table_path(self) -> None:
178178
ReferenceGenome.GRCh38,
179179
DatasetType.GCNV,
180180
),
181-
'/seqr/seqr-hail-search-data/v3.1/GRCh38/GCNV/annotations.ht',
181+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/GCNV/annotations.ht',
182182
)
183183

184184
def test_remapped_and_subsetted_callset_path(self) -> None:
@@ -189,7 +189,7 @@ def test_remapped_and_subsetted_callset_path(self) -> None:
189189
'gs://abc.efg/callset.vcf.gz',
190190
'R0111_tgg_bblanken_wes',
191191
),
192-
'/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
192+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
193193
)
194194
self.assertEqual(
195195
remapped_and_subsetted_callset_path(
@@ -198,7 +198,7 @@ def test_remapped_and_subsetted_callset_path(self) -> None:
198198
'gs://abc.efg/callset/*.vcf.gz',
199199
'R0111_tgg_bblanken_wes',
200200
),
201-
'/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/bce53ccdb49a5ed2513044e1d0c6224e3ffcc323f770dc807d9175fd3c70a050.mt',
201+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/remapped_and_subsetted_callsets/R0111_tgg_bblanken_wes/bce53ccdb49a5ed2513044e1d0c6224e3ffcc323f770dc807d9175fd3c70a050.mt',
202202
)
203203

204204
def test_imported_callset_path(self) -> None:
@@ -208,7 +208,7 @@ def test_imported_callset_path(self) -> None:
208208
DatasetType.SNV_INDEL,
209209
'gs://abc.efg/callset.vcf.gz',
210210
),
211-
'/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imported_callsets/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
211+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imported_callsets/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt',
212212
)
213213

214214
def test_imputed_sex_path(self) -> None:
@@ -218,7 +218,7 @@ def test_imputed_sex_path(self) -> None:
218218
DatasetType.SNV_INDEL,
219219
'gs://abc.efg/callset.vcf.gz',
220220
),
221-
'/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imputed_sex/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.tsv',
221+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/imputed_sex/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.tsv',
222222
)
223223

224224
def test_new_variants_table_path(self) -> None:
@@ -228,7 +228,7 @@ def test_new_variants_table_path(self) -> None:
228228
DatasetType.SNV_INDEL,
229229
'manual__2023-06-26T18:30:09.349671+00:00',
230230
),
231-
'/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/new_variants.ht',
231+
'/var/seqr/seqr-hail-search-data/v3.1/GRCh38/SNV_INDEL/runs/manual__2023-06-26T18:30:09.349671+00:00/new_variants.ht',
232232
)
233233

234234
def test_project_remap_path(self) -> None:
@@ -239,7 +239,7 @@ def test_project_remap_path(self) -> None:
239239
SampleType.WGS,
240240
'R0652_pipeline_test',
241241
),
242-
'/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/remaps/WGS/R0652_pipeline_test.ht',
242+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/SNV_INDEL/remaps/WGS/R0652_pipeline_test.ht',
243243
)
244244

245245
def test_project_pedigree_path(self) -> None:
@@ -250,5 +250,5 @@ def test_project_pedigree_path(self) -> None:
250250
SampleType.WES,
251251
'R0652_pipeline_test',
252252
),
253-
'/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/pedigrees/WES/R0652_pipeline_test.ht',
253+
'/var/seqr/seqr-loading-temp/v3.1/GRCh38/GCNV/pedigrees/WES/R0652_pipeline_test.ht',
254254
)

v03_pipeline/lib/tasks/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from v03_pipeline.lib.tasks.write_project_family_tables import (
3535
WriteProjectFamilyTablesTask,
3636
)
37+
from v03_pipeline.lib.tasks.write_success_file import WriteSuccessFileTask
3738

3839
__all__ = [
3940
'DeleteFamilyTableTask',
@@ -52,4 +53,5 @@
5253
'UpdateVariantAnnotationsTableWithDeletedFamiliesTask',
5354
'WriteMetadataForRunTask',
5455
'WriteProjectFamilyTablesTask',
56+
'WriteSuccessFileTask',
5557
]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import luigi
2+
import luigi.util
3+
4+
from v03_pipeline.lib.paths import pipeline_run_success_file_path
5+
from v03_pipeline.lib.tasks import WriteProjectFamilyTablesTask
6+
from v03_pipeline.lib.tasks.base.base_project_info_params import (
7+
BaseLoadingRunWithProjectInfoParams,
8+
)
9+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
10+
11+
12+
@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
13+
class WriteSuccessFileTask(luigi.Task):
14+
def output(self) -> luigi.Target:
15+
return GCSorLocalTarget(
16+
pipeline_run_success_file_path(
17+
self.reference_genome,
18+
self.dataset_type,
19+
self.run_id,
20+
),
21+
)
22+
23+
def requires(self):
24+
return [
25+
self.clone(
26+
WriteProjectFamilyTablesTask,
27+
project_guid=self.project_guids[i],
28+
project_remap_path=self.project_remap_paths[i],
29+
project_pedigree_path=self.project_pedigree_paths[i],
30+
)
31+
for i in range(len(self.project_guids))
32+
]
33+
34+
def run(self):
35+
with self.output().open('w') as f:
36+
f.write('')

0 commit comments

Comments
 (0)