Skip to content

Commit 86ee4dd

Browse files
authored
Add scripts for building apptainer container and fix short read samplesheet input error caused by long read null values (#36)
* add scripts for building apptainer container * add fix for short read samplesheet issue caused by long read null values * temporarily add test branch to build container workflow * add apptainer to profile * remove test branch from build container workflow * add perl to environment for container for shasum command * build container on test branch after adding environment change * remove test branch after intitial build and update nf config * pass all scheme files as a channel for compatibility with apptainer profile
1 parent c806edb commit 86ee4dd

File tree

10 files changed

+161
-12
lines changed

10 files changed

+161
-12
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
mkdir -p artifacts
3+
mkdir -p wave_images
4+
5+
for env_yaml in environments/*.yml; do
6+
image_name=$(head -n 1 $env_yaml | cut -d ' ' -f 2)
7+
echo "building image ${image_name} from file ${env_yaml}..."
8+
wave \
9+
--conda-file ${env_yaml} \
10+
--singularity \
11+
--freeze \
12+
--await \
13+
--output json \
14+
| python -m json.tool \
15+
| tee wave_images/${image_name}.json
16+
echo "done building image ${image_name}"
17+
cp wave_images/${image_name}.json artifacts/
18+
done
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
set -eo pipefail
3+
wget https://raw.githubusercontent.com/apptainer/apptainer/main/tools/install-unprivileged.sh
4+
chmod +x install-unprivileged.sh
5+
mkdir -p /opt/apptainer
6+
./install-unprivileged.sh /opt/apptainer
7+
echo "/opt/apptainer/bin" >> $GITHUB_PATH
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
set -eo pipefail
3+
wget https://github.com/seqeralabs/wave-cli/releases/download/v1.4.1/wave-1.4.1-linux-x86_64
4+
mv wave-1.4.1-linux-x86_64 wave
5+
chmod +x wave
6+
mkdir -p /opt/wave/bin
7+
mv wave /opt/wave/bin
8+
echo "/opt/wave/bin" >> $GITHUB_PATH
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import glob
5+
import json
6+
import os
7+
import subprocess
8+
9+
10+
def pull_image(image_url, destination_image_file):
11+
"""
12+
Pull the image file from external server
13+
"""
14+
apptainer_pull_cmd = [
15+
"apptainer",
16+
"pull",
17+
destination_image_file,
18+
image_url,
19+
]
20+
subprocess.run(apptainer_pull_cmd)
21+
22+
23+
def push_image(source_image_file, image_url):
24+
"""
25+
Push apptainer image to destination image repository
26+
"""
27+
28+
apptainer_push_cmd = [
29+
"apptainer",
30+
"push",
31+
source_image_file,
32+
image_url,
33+
]
34+
subprocess.run(apptainer_push_cmd)
35+
36+
37+
def main(args):
38+
repo_owner = os.environ['GITHUB_REPOSITORY_OWNER'].lower()
39+
40+
wave_jsons = glob.glob(os.path.join(args.wave_jsons_dir, "*.json"))
41+
for wave_json in wave_jsons:
42+
with open(wave_json, 'r') as f:
43+
w = json.load(f)
44+
pull_image_url = w['containerImage']
45+
image_name_with_version = pull_image_url.split('/')[-1]
46+
image_name, image_version = image_name_with_version.split(':')
47+
pull_destination = os.path.join(args.images_dir, f"{image_name}--{image_version}.img")
48+
pull_image(pull_image_url, pull_destination)
49+
50+
push_image_url = f"oras://ghcr.io/{repo_owner}/{image_name}:{image_version}"
51+
push_image(pull_destination, push_image_url)
52+
53+
54+
if __name__ == '__main__':
55+
parser = argparse.ArgumentParser()
56+
parser.add_argument('--wave-jsons-dir')
57+
parser.add_argument('--images-dir')
58+
args = parser.parse_args()
59+
main(args)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Build and Push Container Images
2+
3+
on:
4+
push:
5+
tags:
6+
- v*
7+
workflow_dispatch:
8+
9+
env:
10+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
11+
GHCR_USERNAME: ${{ secrets.GHCR_USERNAME }}
12+
13+
jobs:
14+
build_and_push:
15+
runs-on: ubuntu-latest
16+
permissions:
17+
packages: write
18+
contents: read
19+
steps:
20+
- uses: actions/checkout@v4
21+
- name: Install Apptainer
22+
run: |
23+
.github/scripts/install_apptainer.sh
24+
- name: Check Apptainer installation
25+
run: apptainer --version
26+
- name: Install Wave CLI
27+
run: |
28+
.github/scripts/install_wave-cli.sh
29+
- name: Check wave installation
30+
run: wave --version
31+
- name: Build images
32+
run: |
33+
.github/scripts/build_container_images_wave.sh
34+
- name: Push images
35+
run: |
36+
echo ${GITHUB_TOKEN} | apptainer registry login -u ${GHCR_USERNAME} --password-stdin oras://ghcr.io
37+
.github/scripts/push_container_images_wave.py --wave-jsons-dir wave_images --images-dir wave_images

environments/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ dependencies:
66
- python=3.7
77
- fastp=0.20.1
88
- kma=1.3.5
9+
- perl

main.nf

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,27 @@ workflow {
3333

3434
if (params.samplesheet_input != 'NO_FILE') {
3535
ch_illumina_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['R1'], it['R2']]] }
36-
ch_nanopore_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['LONG']]] }.filter{ it -> it[1] != null }
36+
ch_nanopore_fastq = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], [it['LONG']]] }.filter{ it -> it[1][0] != null }
3737
} else {
3838
ch_illumina_fastq = Channel.fromFilePairs( params.fastq_illumina_search_path, flat: true ).map{ it -> [it[0].split('_')[0], [it[1], it[2]]] }.unique{ it -> it[0] }
3939
ch_nanopore_fastq = Channel.fromPath( params.fastq_nanopore_search_path ).map{ it -> [it.getName().split('_')[0], [it]] }.unique{ it -> it[0] }
4040
}
4141

42+
4243
ch_scheme = Channel.fromPath( "${params.scheme}")
44+
// when using apptainer profile, need to pass all scheme files as channel
45+
schemePath = new File(params.scheme).getAbsolutePath()
46+
schemeDir = new File(schemePath).getParentFile()
47+
schemePrefix = new File(schemePath).getName()
48+
49+
schemeFiles = []
50+
schemeDir.eachFileMatch( ~/${schemePrefix}\.comp\.b|${schemePrefix}\.length\.b|${schemePrefix}\.name|${schemePrefix}\.seq\.b/ ) { schemeFiles << it }
51+
52+
ch_schemeFiles = Channel.fromPath(schemeFiles).collect().toList()
53+
54+
schemeNameFile = schemeDir.listFiles().find { it.name == "${schemePrefix}.name" }
55+
ch_schemeName = Channel.fromPath(schemeNameFile)
56+
4357

4458
main:
4559
ch_illumina_sample_ids = ch_illumina_fastq.map{ it -> it[0] }
@@ -61,9 +75,9 @@ workflow {
6175

6276
trimmed_reads = fastp.out.trimmed_reads.mix(filtlong.out.filtered_reads.map{ it -> [it[0], [it[1]]] })
6377

64-
kma_align(trimmed_reads.combine(ch_scheme))
78+
kma_align(trimmed_reads.combine(ch_scheme).combine(ch_schemeFiles))
6579

66-
kma_result_to_mlst(kma_align.out.res.combine(ch_scheme))
80+
kma_result_to_mlst(kma_align.out.res.combine(ch_schemeName))
6781

6882
count_called_alleles(kma_result_to_mlst.out.mlst)
6983

modules/kma_align.nf

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ process kma_align {
88
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_kma*.{c,t}sv", mode: 'copy'
99

1010
input:
11-
tuple val(sample_id), path(reads), val(scheme)
11+
tuple val(sample_id), path(reads), val(scheme), path(schemeFiles)
1212

1313
output:
1414
tuple val(sample_id), path("${sample_id}_kma.csv"), emit: res
@@ -37,11 +37,6 @@ process kma_align {
3737
printf -- " value: ${scheme}\\n" >> ${sample_id}_kma_align_provenance.yml
3838
printf -- " - parameter: -and\\n" >> ${sample_id}_kma_align_provenance.yml
3939
printf -- " value: null\\n" >> ${sample_id}_kma_align_provenance.yml
40-
41-
# ln -s ${scheme}.comp.b .
42-
# ln -s ${scheme}.length.b .
43-
# ln -s ${scheme}.name .
44-
# ln -s ${scheme}.seq.b .
4540
4641
kma \
4742
-t ${task.cpus} \

modules/kma_result_to_mlst.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ process kma_result_to_mlst {
66
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_{cgmlst,locus_qc}.csv", mode: 'copy'
77

88
input:
9-
tuple val(sample_id), path(kma_result), val(scheme)
9+
tuple val(sample_id), path(kma_result), path(schemeName)
1010

1111
output:
1212
tuple val(sample_id), path("${sample_id}_cgmlst.csv"), emit: mlst
1313
tuple val(sample_id), path("${sample_id}_locus_qc.csv"), emit: mlst_qc
1414

1515
script:
1616
"""
17-
ln -s ${scheme}.name .
17+
1818
kma_result_to_mlst.py \
1919
"${kma_result}" \
20-
--alleles ${scheme}.name \
20+
--alleles "${schemeName}" \
2121
--sample-id "${sample_id}" \
2222
--locus-allele-delimiter "_" \
2323
--min-identity ${params.min_identity} \

nextflow.config

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ profiles {
5757
conda.cacheDir = params.cache
5858
}
5959
}
60+
61+
apptainer {
62+
apptainer.enabled = true
63+
process.container = "oras://ghcr.io/bccdc-phl/kma-cgmlst:c786c4aa0d848f6e"
64+
if (params.cache){
65+
apptainer.cacheDir = params.cache
66+
}
67+
}
68+
6069
}
6170

6271
process {
@@ -68,5 +77,6 @@ process {
6877
}
6978
withName: filtlong {
7079
conda = "$baseDir/environments/long_read_qc.yml"
80+
container = "oras://ghcr.io/bccdc-phl/kma-cgmlst-long-read-qc:070162322064a2f4"
7181
}
7282
}

0 commit comments

Comments
 (0)