diff --git a/v03_pipeline/lib/model/cached_reference_dataset_query.py b/v03_pipeline/lib/model/cached_reference_dataset_query.py index 42b9f6abd..02ff1c807 100644 --- a/v03_pipeline/lib/model/cached_reference_dataset_query.py +++ b/v03_pipeline/lib/model/cached_reference_dataset_query.py @@ -4,8 +4,7 @@ import hail as hl from v03_pipeline.lib.model.dataset_type import DatasetType -from v03_pipeline.lib.model.definitions import AccessControl, ReferenceGenome -from v03_pipeline.lib.model.environment import Env +from v03_pipeline.lib.model.definitions import ReferenceGenome from v03_pipeline.lib.reference_data.queries import ( clinvar_path_variants, gnomad_coding_and_noncoding_variants, @@ -20,10 +19,6 @@ class CachedReferenceDatasetQuery(Enum): GNOMAD_QC = 'gnomad_qc' HIGH_AF_VARIANTS = 'high_af_variants' - @property - def access_control(self) -> AccessControl: - return AccessControl.PUBLIC - def dataset(self, dataset_type: DatasetType) -> str | None: return { CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS: 'clinvar_mito' @@ -56,15 +51,10 @@ def for_reference_genome_dataset_type( reference_genome: ReferenceGenome, dataset_type: DatasetType, ) -> list['CachedReferenceDatasetQuery']: - crdqs = { + return { (ReferenceGenome.GRCh38, DatasetType.SNV_INDEL): list(cls), (ReferenceGenome.GRCh38, DatasetType.MITO): [ CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ], (ReferenceGenome.GRCh37, DatasetType.SNV_INDEL): list(cls), }.get((reference_genome, dataset_type), []) - if not Env.ACCESS_PRIVATE_REFERENCE_DATASETS: - return [ - crdq for crdq in crdqs if crdq.access_control == AccessControl.PUBLIC - ] - return crdqs diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 93669d43a..14482d831 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -41,6 +41,22 @@ def _v03_reference_data_prefix( ) +def cached_reference_dataset_query_path( + reference_genome: ReferenceGenome, + dataset_type: DatasetType, + cached_reference_dataset_query: CachedReferenceDatasetQuery, +) -> str: + return os.path.join( + _v03_reference_data_prefix( + AccessControl.PUBLIC, + reference_genome, + ), + dataset_type.value, + 'cached_reference_dataset_queries', + f'{cached_reference_dataset_query.value}.ht', + ) + + def family_table_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, @@ -182,27 +198,6 @@ def sex_check_table_path( ) -def valid_cached_reference_dataset_query_path( - reference_genome: ReferenceGenome, - dataset_type: DatasetType, - cached_reference_dataset_query: CachedReferenceDatasetQuery, -) -> str | None: - if ( - not Env.ACCESS_PRIVATE_REFERENCE_DATASETS - and cached_reference_dataset_query.access_control == AccessControl.PRIVATE - ): - return None - return os.path.join( - _v03_reference_data_prefix( - cached_reference_dataset_query.access_control, - reference_genome, - ), - dataset_type.value, - 'cached_reference_dataset_queries', - f'{cached_reference_dataset_query.value}.ht', - ) - - def valid_reference_dataset_collection_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index fabb920e1..d6f0b10ba 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -8,6 +8,7 @@ ReferenceGenome, ) from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, family_table_path, imported_callset_path, lookup_table_path, @@ -17,7 +18,6 @@ relatedness_check_table_path, remapped_and_subsetted_callset_path, sex_check_table_path, - valid_cached_reference_dataset_query_path, valid_reference_dataset_collection_path, variant_annotations_table_path, ) @@ -26,7 +26,7 @@ class TestPaths(unittest.TestCase): def test_cached_reference_dataset_query_path(self) -> None: self.assertEqual( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py index 9177aa21f..92fc7718b 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py @@ -8,7 +8,7 @@ ReferenceDatasetCollection, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.compare_globals import ( @@ -56,7 +56,7 @@ def complete(self) -> bool: def output(self) -> luigi.Target: return GCSorLocalTarget( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, self.crdq, diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py index 6541f8672..8dd9558c8 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py @@ -14,7 +14,7 @@ SampleType, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -167,7 +167,7 @@ def test_clinvar( # clinvar has version '2022-01-01' shutil.copytree( CLINVAR_CRDQ_PATH, - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 21fe5f532..80a8fe2b4 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -24,7 +24,7 @@ SampleType, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -341,7 +341,7 @@ def test_multiple_update_vat( ), ) coding_and_noncoding_variants_ht.write( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 345af90e2..e5847380b 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -18,9 +18,9 @@ from v03_pipeline.lib.model import CachedReferenceDatasetQuery from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, imported_callset_path, sex_check_table_path, - valid_cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget, HailTableTask @@ -86,7 +86,7 @@ def requires(self) -> list[luigi.Task]: ) if Env.REFERENCE_DATA_AUTO_UPDATE else HailTableTask( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, @@ -169,7 +169,7 @@ def create_table(self) -> hl.MatrixTable: validate_no_duplicate_variants(mt) validate_expected_contig_frequency(mt, self.reference_genome) coding_and_noncoding_ht = hl.read_table( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index be7b92e6e..1ba75446c 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -4,8 +4,8 @@ from v03_pipeline.lib.methods.relatedness import call_relatedness from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, relatedness_check_table_path, - valid_cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask @@ -48,7 +48,7 @@ def requires(self) -> luigi.Task: ) if Env.REFERENCE_DATA_AUTO_UPDATE else HailTableTask( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_QC, diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py index dd33bb5e2..239c6ea11 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py @@ -12,9 +12,9 @@ SampleType, ) from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, imported_callset_path, relatedness_check_table_path, - valid_cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.write_relatedness_check_table import ( WriteRelatednessCheckTableTask, @@ -45,7 +45,7 @@ class WriteRelatednessCheckTableTaskTest(MockedDatarootTestCase): def setUp(self) -> None: super().setUp() - self.gnomad_qc_path = valid_cached_reference_dataset_query_path( + self.gnomad_qc_path = cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.GNOMAD_QC,