cBioPortal · alisman · Oct 8, 2025 · Oct 15, 2025 · Oct 17, 2025 · alisman
diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql
@@ -173,7 +173,7 @@ CREATE TABLE IF NOT EXISTS genomic_event_derived
     patient_unique_id         String,
     off_panel                 Boolean DEFAULT FALSE
 ) ENGINE = MergeTree
-      ORDER BY (variant_type, entrez_gene_id, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id);
+      ORDER BY (cancer_study_identifier, variant_type, entrez_gene_id, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id);
 
 INSERT INTO genomic_event_derived
 -- Insert Mutations
@@ -485,6 +485,105 @@ FROM
         JOIN cancer_study cs ON cs.cancer_study_id = subquery.cancer_study_id
         JOIN sample_derived sd ON sd.internal_id = subquery.sample_id;
 
+
+-- START: PRIMARY KEY ADDITIONS
+-- THE FOLLOWING SCRIPTS EXIST TO ADD PRIMARY KEYS TO LEGACY TABLES THAT ARE MISSING THEM.  YOU 
+-- CANNOT CHANGE THE PRIMARY KEY ON A TABLE IN CLICKHOUSE, SO WE NEED TO CREATE A NEW TABLE WITH THE
+-- PRIMARY KEY AND THEN COPY THE DATA OVER.
+
+
+--Adds primary key to the sample_cna_event table for Clickhouse-only
+DROP TABLE IF EXISTS sample_cna_event_BACKUP;
+CREATE TABLE sample_cna_event_BACKUP
+(
+    `cna_event_id` Int64 COMMENT 'References cna_event.cna_event_id.',
+    `sample_id` Int64 COMMENT 'References sample.internal_id.',
+    `genetic_profile_id` Int64 COMMENT 'References genetic_profile.genetic_profile_id.',
+    `annotation_json` Nullable(String) COMMENT 'JSON-formatted annotation details.'
+)
+    ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
+PRIMARY KEY (genetic_profile_id, cna_event_id, sample_id)
+ORDER BY (genetic_profile_id, cna_event_id, sample_id)
+SETTINGS index_granularity = 8192
+COMMENT 'Observed CNA events per sample and profile. References cna_event, sample, and genetic_profile.';
+
+-- Copy the data
+INSERT INTO sample_cna_event_BACKUP
+SELECT * FROM sample_cna_event;
+
+-- SWITCH THE TABLES
+EXCHANGE TABLES sample_cna_event_BACKUP AND sample_cna_event;
+
+DROP TABLE IF EXISTS mutation_BACKUP;
+CREATE TABLE mutation_BACKUP
+(
+    `mutation_event_id` Int64 COMMENT 'References mutation_event.mutation_event_id.',
+    `genetic_profile_id` Int64 COMMENT 'References genetic_profile.genetic_profile_id.',
+    `sample_id` Int64 COMMENT 'References sample.internal_id.',
+    `entrez_gene_id` Int64 COMMENT 'References gene.entrez_gene_id.',
+    `center` Nullable(String) COMMENT 'Center where sequencing was performed.',
+    `sequencer` Nullable(String) COMMENT 'Sequencing platform used.',
+    `mutation_status` Nullable(String) COMMENT 'Mutation status: Germline, Somatic, or LOH.',
+    `validation_status` Nullable(String) COMMENT 'Validation status.',
+    `tumor_seq_allele1` Nullable(String) COMMENT 'Tumor allele 1 sequence.',
+    `tumor_seq_allele2` Nullable(String) COMMENT 'Tumor allele 2 sequence.',
+    `matched_norm_sample_barcode` Nullable(String) COMMENT 'Matched normal sample barcode.',
+    `match_norm_seq_allele1` Nullable(String) COMMENT 'Matched normal allele 1 sequence.',
+    `match_norm_seq_allele2` Nullable(String) COMMENT 'Matched normal allele 2 sequence.',
+    `tumor_validation_allele1` Nullable(String) COMMENT 'Tumor validation allele 1 sequence.',
+    `tumor_validation_allele2` Nullable(String) COMMENT 'Tumor validation allele 2 sequence.',
+    `match_norm_validation_allele1` Nullable(String) COMMENT 'Matched normal validation allele 1.',
+    `match_norm_validation_allele2` Nullable(String) COMMENT 'Matched normal validation allele 2.',
+    `verification_status` Nullable(String) COMMENT 'Verification status.',
+    `sequencing_phase` Nullable(String) COMMENT 'Sequencing phase.',
+    `sequence_source` Nullable(String) COMMENT 'Source of sequencing data.',
+    `validation_method` Nullable(String) COMMENT 'Validation method used.',
+    `score` Nullable(String) COMMENT 'Score or quality metric.',
+    `bam_file` Nullable(String) COMMENT 'Associated BAM file.',
+    `tumor_alt_count` Nullable(Int64) COMMENT 'Tumor alternate allele count.',
+    `tumor_ref_count` Nullable(Int64) COMMENT 'Tumor reference allele count.',
+    `normal_alt_count` Nullable(Int64) COMMENT 'Normal alternate allele count.',
+    `normal_ref_count` Nullable(Int64) COMMENT 'Normal reference allele count.',
+    `amino_acid_change` Nullable(String) COMMENT 'Amino acid change from mutation.',
+    `annotation_json` Nullable(String) COMMENT 'JSON-formatted annotations.'
+)
+    ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
+PRIMARY KEY (genetic_profile_id,entrez_gene_id)
+ORDER BY (genetic_profile_id,entrez_gene_id)
+SETTINGS index_granularity = 8192
+COMMENT 'Mutation observations in specific samples and profiles. References mutation_event, gene, genetic_profile, and sample.'         
+
+-- copy data into new table
+INSERT INTO mutation_BACKUP
+SELECT * FROM mutation;
+
+-- switch the tables
+EXCHANGE TABLES mutation_BACKUP AND mutation;         
+
+
+-- Adds primary key genetic_alteration table for Clickhouse-only
+DROP TABLE IF EXISTS genetic_alteration_BACKUP;
+CREATE TABLE genetic_alteration_BACKUP
+(
+    `genetic_profile_id` Int64,
+    `genetic_entity_id` Int64,
+    `values` String
+)
+    ENGINE = SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
+        PRIMARY KEY (genetic_profile_id, genetic_entity_id)
+        ORDER BY (genetic_profile_id, genetic_entity_id)
+        SETTINGS index_granularity = 8192;
+
+-- Copy the data
+INSERT INTO genetic_alteration_BACKUP
+SELECT * FROM genetic_alteration;
+
+-- SWITCH THE TABLES
+EXCHANGE TABLES genetic_alteration_BACKUP AND genetic_alteration;
+
+--END: PRIMARY KEY ADDITIONS
+
+
 OPTIMIZE TABLE sample_to_gene_panel_derived;
 OPTIMIZE TABLE gene_panel_to_gene_derived;
 OPTIMIZE TABLE sample_derived;