Add scripts for building apptainer container and fix short read samplesheet input error caused by long read null values (#36)

taranewman · web-flow · commit 86ee4dd1164a · 2025-08-28T13:48:31.000-07:00
* add scripts for building apptainer container

* add fix for short read samplesheet issue caused by long read null values

* temporarily add test branch to build container workflow

* add apptainer to profile

* remove test branch from build container workflow

* add perl to environment for container for shasum command

* build container on test branch after adding environment change

* remove test branch after intitial build and update nf config

* pass all scheme files as a channel for compatibility with apptainer profile
diff --git a/.github/scripts/build_container_images_wave.sh b/.github/scripts/build_container_images_wave.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+mkdir -p artifacts
+mkdir -p wave_images
+
+for env_yaml in environments/*.yml; do
+    image_name=$(head -n 1 $env_yaml | cut -d ' ' -f 2)
+    echo "building image ${image_name} from file ${env_yaml}..."
+    wave \
+	--conda-file ${env_yaml} \
+	--singularity \
+	--freeze \
+	--await \
+	--output json \
+	| python -m json.tool \
+	| tee wave_images/${image_name}.json
+    echo "done building image ${image_name}"
+    cp wave_images/${image_name}.json artifacts/
+done
diff --git a/.github/scripts/install_apptainer.sh b/.github/scripts/install_apptainer.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -eo pipefail
+wget https://raw.githubusercontent.com/apptainer/apptainer/main/tools/install-unprivileged.sh
+chmod +x install-unprivileged.sh
+mkdir -p /opt/apptainer
+./install-unprivileged.sh /opt/apptainer
+echo "/opt/apptainer/bin" >> $GITHUB_PATH
diff --git a/.github/scripts/install_wave-cli.sh b/.github/scripts/install_wave-cli.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -eo pipefail
+wget https://github.com/seqeralabs/wave-cli/releases/download/v1.4.1/wave-1.4.1-linux-x86_64
+mv wave-1.4.1-linux-x86_64 wave
+chmod +x wave
+mkdir -p /opt/wave/bin
+mv wave /opt/wave/bin
+echo "/opt/wave/bin" >> $GITHUB_PATH
diff --git a/.github/scripts/push_container_images_wave.py b/.github/scripts/push_container_images_wave.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+import argparse
+import glob
+import json
+import os
+import subprocess
+
+
+def pull_image(image_url, destination_image_file):
+    """
+    Pull the image file from external server
+    """
+    apptainer_pull_cmd = [
+        "apptainer",
+        "pull",
+        destination_image_file,
+        image_url,
+    ]
+    subprocess.run(apptainer_pull_cmd)
+
+
+def push_image(source_image_file, image_url):
+    """
+    Push apptainer image to destination image repository
+    """
+
+    apptainer_push_cmd = [
+        "apptainer",
+        "push",
+        source_image_file,
+        image_url,
+    ]
+    subprocess.run(apptainer_push_cmd)
+
+
+def main(args):
+    repo_owner = os.environ['GITHUB_REPOSITORY_OWNER'].lower()
+    
+    wave_jsons = glob.glob(os.path.join(args.wave_jsons_dir, "*.json"))
+    for wave_json in wave_jsons:
+        with open(wave_json, 'r') as f:
+            w = json.load(f)
+            pull_image_url = w['containerImage']
+            image_name_with_version = pull_image_url.split('/')[-1]
+            image_name, image_version = image_name_with_version.split(':')
+            pull_destination = os.path.join(args.images_dir, f"{image_name}--{image_version}.img")
+            pull_image(pull_image_url, pull_destination)
+
+            push_image_url = f"oras://ghcr.io/{repo_owner}/{image_name}:{image_version}"
+            push_image(pull_destination, push_image_url)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--wave-jsons-dir')
+    parser.add_argument('--images-dir')
+    args = parser.parse_args()
+    main(args)
diff --git a/.github/workflows/build_and_push_containers.yml b/.github/workflows/build_and_push_containers.yml
@@ -0,0 +1,37 @@
+name: Build and Push Container Images
+
+on:
+  push:
+    tags:
+      - v*
+  workflow_dispatch:
+
+env:
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  GHCR_USERNAME: ${{ secrets.GHCR_USERNAME }}
+    
+jobs:
+  build_and_push:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Apptainer
+        run: |
+          .github/scripts/install_apptainer.sh
+      - name: Check Apptainer installation
+        run: apptainer --version
+      - name: Install Wave CLI
+        run: |
+          .github/scripts/install_wave-cli.sh
+      - name: Check wave installation
+        run: wave --version
+      - name: Build images
+        run: |
+          .github/scripts/build_container_images_wave.sh
+      - name: Push images
+        run: |
+          echo ${GITHUB_TOKEN} | apptainer registry login -u ${GHCR_USERNAME} --password-stdin oras://ghcr.io
+          .github/scripts/push_container_images_wave.py --wave-jsons-dir wave_images --images-dir wave_images
diff --git a/environments/environment.yml b/environments/environment.yml
@@ -6,3 +6,4 @@ dependencies:
   - python=3.7
   - fastp=0.20.1
   - kma=1.3.5
+  - perl
diff --git a/main.nf b/main.nf
@@ -33,13 +33,27 @@ workflow {
 
     if (params.samplesheet_input != 'NO_FILE') {
 	ch_illumina_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['R1'], it['R2']]] }
-	ch_nanopore_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['LONG']]] }.filter{ it -> it[1] != null }
+    ch_nanopore_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['LONG']]] }.filter{ it -> it[1][0] != null }
     } else {
 	ch_illumina_fastq = Channel.fromFilePairs( params.fastq_illumina_search_path, flat: true ).map{ it -> [it[0].split('_')[0], [it[1], it[2]]] }.unique{ it -> it[0] }
 	ch_nanopore_fastq = Channel.fromPath( params.fastq_nanopore_search_path ).map{ it -> [it.getName().split('_')[0], [it]] }.unique{ it -> it[0] }
     }
 
+
     ch_scheme = Channel.fromPath( "${params.scheme}")
+    // when using apptainer profile, need to pass all scheme files as channel
+    schemePath = new File(params.scheme).getAbsolutePath()
+    schemeDir    = new File(schemePath).getParentFile()
+    schemePrefix = new File(schemePath).getName()
+
+    schemeFiles = []
+    schemeDir.eachFileMatch( ~/${schemePrefix}\.comp\.b|${schemePrefix}\.length\.b|${schemePrefix}\.name|${schemePrefix}\.seq\.b/ ) { schemeFiles << it }
+
+    ch_schemeFiles = Channel.fromPath(schemeFiles).collect().toList()
+
+    schemeNameFile = schemeDir.listFiles().find { it.name == "${schemePrefix}.name" }
+    ch_schemeName  = Channel.fromPath(schemeNameFile)
+
     
     main:
     ch_illumina_sample_ids = ch_illumina_fastq.map{ it -> it[0] }
@@ -61,9 +75,9 @@ workflow {
 
     trimmed_reads = fastp.out.trimmed_reads.mix(filtlong.out.filtered_reads.map{ it -> [it[0], [it[1]]] })
     
-    kma_align(trimmed_reads.combine(ch_scheme))
+    kma_align(trimmed_reads.combine(ch_scheme).combine(ch_schemeFiles))
 
-    kma_result_to_mlst(kma_align.out.res.combine(ch_scheme))
+    kma_result_to_mlst(kma_align.out.res.combine(ch_schemeName))
 
     count_called_alleles(kma_result_to_mlst.out.mlst)
 
diff --git a/modules/kma_align.nf b/modules/kma_align.nf
@@ -8,7 +8,7 @@ process kma_align {
     publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_kma*.{c,t}sv", mode: 'copy'
 
     input:
-    tuple val(sample_id), path(reads), val(scheme)
+    tuple val(sample_id), path(reads), val(scheme), path(schemeFiles)
 
     output:
     tuple val(sample_id), path("${sample_id}_kma.csv"), emit: res
@@ -37,11 +37,6 @@ process kma_align {
     printf -- "          value: ${scheme}\\n"      >> ${sample_id}_kma_align_provenance.yml
     printf -- "        - parameter: -and\\n"       >> ${sample_id}_kma_align_provenance.yml
     printf -- "          value: null\\n"           >> ${sample_id}_kma_align_provenance.yml
-
-    # ln -s ${scheme}.comp.b .
-    # ln -s ${scheme}.length.b .
-    # ln -s ${scheme}.name .
-    # ln -s ${scheme}.seq.b .
     
     kma \
 	-t ${task.cpus} \
diff --git a/modules/kma_result_to_mlst.nf b/modules/kma_result_to_mlst.nf
@@ -6,18 +6,18 @@ process kma_result_to_mlst {
     publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_{cgmlst,locus_qc}.csv", mode: 'copy'
 
     input:
-    tuple val(sample_id), path(kma_result), val(scheme)
+    tuple val(sample_id), path(kma_result), path(schemeName)
 
     output:
     tuple val(sample_id), path("${sample_id}_cgmlst.csv"), emit: mlst
     tuple val(sample_id), path("${sample_id}_locus_qc.csv"), emit: mlst_qc
     
     script:
     """
-    ln -s ${scheme}.name .
+   
     kma_result_to_mlst.py \
       "${kma_result}" \
-      --alleles ${scheme}.name \
+      --alleles "${schemeName}" \
       --sample-id "${sample_id}" \
       --locus-allele-delimiter "_" \
       --min-identity ${params.min_identity} \
diff --git a/nextflow.config b/nextflow.config
@@ -57,6 +57,15 @@ profiles {
 	    conda.cacheDir = params.cache
 	}
     }
+
+    apptainer {
+        apptainer.enabled = true
+        process.container = "oras://ghcr.io/bccdc-phl/kma-cgmlst:c786c4aa0d848f6e"
+        if (params.cache){
+            apptainer.cacheDir = params.cache
+        }
+    }
+ 
 }
 
 process {
@@ -68,5 +77,6 @@ process {
     }
     withName: filtlong {
 	conda = "$baseDir/environments/long_read_qc.yml"
+    container = "oras://ghcr.io/bccdc-phl/kma-cgmlst-long-read-qc:070162322064a2f4"
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -57,6 +57,15 @@ profiles {`
`57`	`57`	`conda.cacheDir = params.cache`
`58`	`58`	`}`
`59`	`59`	`}`
	`60`	`+`
	`61`	`+ apptainer {`
	`62`	`+ apptainer.enabled = true`
	`63`	`+ process.container = "oras://ghcr.io/bccdc-phl/kma-cgmlst:c786c4aa0d848f6e"`
	`64`	`+ if (params.cache){`
	`65`	`+ apptainer.cacheDir = params.cache`
	`66`	`+ }`
	`67`	`+ }`
	`68`	`+`
`60`	`69`	`}`
`61`	`70`
`62`	`71`	`process {`
`@@ -68,5 +77,6 @@ process {`
`68`	`77`	`}`
`69`	`78`	`withName: filtlong {`
`70`	`79`	`conda = "$baseDir/environments/long_read_qc.yml"`
	`80`	`+ container = "oras://ghcr.io/bccdc-phl/kma-cgmlst-long-read-qc:070162322064a2f4"`
`71`	`81`	`}`
`72`	`82`	`}`