From 58de91dced4c6965ab79171099011d764f5c3dc3 Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 08:34:48 +0000 Subject: [PATCH 1/6] Pre 1.5.6 push --- artic/align_trim.py | 86 ++++++++++++++++++++++++++------------------- artic/minion.py | 12 +++++-- setup.cfg | 2 +- 3 files changed, 60 insertions(+), 40 deletions(-) diff --git a/artic/align_trim.py b/artic/align_trim.py index 7898294a..e0a7d98c 100755 --- a/artic/align_trim.py +++ b/artic/align_trim.py @@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20): return closest -def trim(segment, primer_pos, end, debug): +def trim(segment, primer_pos, end): """Soft mask an alignment to fit within primer start/end sites. Parameters @@ -93,13 +93,14 @@ def trim(segment, primer_pos, end, debug): flag, length = cigar.pop() else: flag, length = cigar.pop(0) - if debug: + if args.verbose: print("Chomped a %s, %s" % (flag, length), file=sys.stderr) except IndexError: - print( - "Ran out of cigar during soft masking - completely masked read will be ignored", - file=sys.stderr, - ) + if args.verbose: + print( + "Ran out of cigar during soft masking - completely masked read will be ignored", + file=sys.stderr, + ) break # if the CIGAR operation consumes the reference sequence, increment/decrement the position by the CIGAR operation length @@ -121,10 +122,10 @@ def trim(segment, primer_pos, end, debug): # calculate how many extra matches are needed in the CIGAR extra = abs(pos - primer_pos) - if debug: + if args.verbose: print("extra %s" % (extra), file=sys.stderr) if extra: - if debug: + if args.verbose: print("Inserted a %s, %s" % (0, extra), file=sys.stderr) if end: cigar.append((0, extra)) @@ -137,12 +138,12 @@ def trim(segment, primer_pos, end, debug): # update the position of the leftmost mappinng base segment.pos = pos - extra - if debug: + if args.verbose: print("New pos: %s" % (segment.pos), file=sys.stderr) # if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion if cigar[0][0] == 2: - if debug: + if args.verbose: print( "softmask created a leading deletion in the CIGAR, shuffling the alignment", file=sys.stderr, @@ -188,16 +189,19 @@ def handle_segment( # filter out unmapped and supplementary alignment segments if segment.is_unmapped: - print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr) + if args.verbose: + print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr) return False if segment.is_supplementary: - print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr) + if args.verbose: + print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr) return False if segment.mapping_quality < min_mapq: - print( - "%s skipped as mapping quality below threshold" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as mapping quality below threshold" % (segment.query_name), + file=sys.stderr, + ) return False # locate the nearest primers to this alignment segment @@ -205,17 +209,19 @@ def handle_segment( p2 = find_primer(bed, segment.reference_end, "-", args.primer_match_threshold) if not p1 or not p2: - print( - "%s skipped as no primer found for segment" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as no primer found for segment" % (segment.query_name), + file=sys.stderr, + ) return False # check if primers are correctly paired and then assign read group # NOTE: removed this as a function as only called once # TODO: will try improving this / moving it to the primer scheme processing code - correctly_paired = p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1] - + correctly_paired = ( + p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1] + ) if not args.no_read_groups: if correctly_paired: @@ -247,10 +253,11 @@ def handle_segment( report_writer.writerow(report) if args.remove_incorrect_pairs and not correctly_paired: - print( - "%s skipped as not correctly paired" % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s skipped as not correctly paired" % (segment.query_name), + file=sys.stderr, + ) return False if args.verbose: @@ -269,7 +276,7 @@ def handle_segment( # softmask the alignment if left primer start/end inside alignment if segment.reference_start < p1_position: try: - trim(segment, p1_position, False, args.verbose) + trim(segment, p1_position, False) if args.verbose: print( "ref start %s >= primer_position %s" @@ -288,7 +295,7 @@ def handle_segment( # softmask the alignment if right primer start/end inside alignment if segment.reference_end > p2_position: try: - trim(segment, p2_position, True, args.verbose) + trim(segment, p2_position, True) if args.verbose: print( "ref start %s >= primer_position %s" @@ -306,11 +313,12 @@ def handle_segment( # check the the alignment still contains bases matching the reference if "M" not in segment.cigarstring: - print( - "%s dropped as does not match reference post masking" - % (segment.query_name), - file=sys.stderr, - ) + if args.verbose: + print( + "%s dropped as does not match reference post masking" + % (segment.query_name), + file=sys.stderr, + ) return False return (amplicon, segment) @@ -397,10 +405,11 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list): amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int) if not segments: - print( - f"No segments assigned to amplicon {amplicon}, skipping", - file=sys.stderr, - ) + if args.verbose: + print( + f"No segments assigned to amplicon {amplicon}, skipping", + file=sys.stderr, + ) continue random.shuffle(segments) @@ -566,7 +575,10 @@ def main(): parser.add_argument("--verbose", action="store_true", help="Debug mode") parser.add_argument("--remove-incorrect-pairs", action="store_true") + global args + args = parser.parse_args() + go(args) diff --git a/artic/minion.py b/artic/minion.py index 59798bb7..a7a09acc 100755 --- a/artic/minion.py +++ b/artic/minion.py @@ -134,11 +134,19 @@ def run(parser, args): normalise_string = "" cmds.append( - f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.trimmed.rg.sorted.bam" + f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam > {args.sample}.trimmed.rg.bam" ) cmds.append( - f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.primertrimmed.rg.sorted.bam" + f"samtools sort -T {args.sample} {args.sample}.trimmed.rg.bam -o {args.sample}.trimmed.rg.sorted.bam" + ) + + cmds.append( + f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam > {args.sample}.primertrimmed.rg.bam" + ) + + cmds.append( + f"samtools sort -T {args.sample} {args.sample}.primertrimmed.rg.bam -o {args.sample}.primertrimmed.rg.sorted.bam" ) cmds.append(f"samtools index {args.sample}.trimmed.rg.sorted.bam") diff --git a/setup.cfg b/setup.cfg index 1f2cf00e..72b235c8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = artic -version = 1.5.5 +version = 1.5.6 author = Nick Loman author_email = n.j.loman@bham.ac.uk maintainer = Sam Wilkinson From 125dc073d040d3ca4bb85816e16b406dd463ca0e Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 08:41:06 +0000 Subject: [PATCH 2/6] Fix align_trim unit test --- tests/align_trim_unit_test.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/align_trim_unit_test.py b/tests/align_trim_unit_test.py index ad298ead..8f714ffe 100644 --- a/tests/align_trim_unit_test.py +++ b/tests/align_trim_unit_test.py @@ -1,7 +1,7 @@ # align_trim_unit_test.py contains unit tests for alignment trimming import os import pysam -import pytest +from types import SimpleNamespace from artic import align_trim from artic import utils @@ -92,6 +92,10 @@ def test_trim(): def testRunner(seg, expectedCIGAR): + global args + + args = SimpleNamespace(verbose=False) + # get the nearest primers to the alignment segment p1 = align_trim.find_primer(primerScheme, seg.reference_start, "+") p2 = align_trim.find_primer(primerScheme, seg.reference_end, "-") @@ -114,7 +118,7 @@ def testRunner(seg, expectedCIGAR): # trim the forward primer try: - align_trim.trim(seg, p1_position, False, False) + align_trim.trim(seg, p1_position, False) except Exception as e: raise Exception( "problem soft masking left primer in {} (error: {})".format( @@ -132,7 +136,7 @@ def testRunner(seg, expectedCIGAR): # trim the reverse primer try: - align_trim.trim(seg, p2_position, True, False) + align_trim.trim(seg, p2_position, True) except Exception as e: raise Exception( "problem soft masking right primer in {} (error: {})".format( From e7f22fec04f4b4645ce3e5745c7ca720c0b90591 Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 08:49:20 +0000 Subject: [PATCH 3/6] Remove verbose as a global arg --- artic/align_trim.py | 28 +++++++++++++--------------- tests/align_trim_unit_test.py | 4 ---- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/artic/align_trim.py b/artic/align_trim.py index e0a7d98c..3c31b030 100755 --- a/artic/align_trim.py +++ b/artic/align_trim.py @@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20): return closest -def trim(segment, primer_pos, end): +def trim(segment, primer_pos, end, verbose=False): """Soft mask an alignment to fit within primer start/end sites. Parameters @@ -71,7 +71,7 @@ def trim(segment, primer_pos, end): The position in the reference to soft mask up to (equates to the start/end position of the primer in the reference) end : bool If True, the segment is being masked from the end (i.e. for the reverse primer) - debug : bool + vernose : bool If True, will print soft masking info during trimming """ # get a copy of the cigar tuples to work with @@ -93,10 +93,10 @@ def trim(segment, primer_pos, end): flag, length = cigar.pop() else: flag, length = cigar.pop(0) - if args.verbose: + if verbose: print("Chomped a %s, %s" % (flag, length), file=sys.stderr) except IndexError: - if args.verbose: + if verbose: print( "Ran out of cigar during soft masking - completely masked read will be ignored", file=sys.stderr, @@ -122,10 +122,10 @@ def trim(segment, primer_pos, end): # calculate how many extra matches are needed in the CIGAR extra = abs(pos - primer_pos) - if args.verbose: + if verbose: print("extra %s" % (extra), file=sys.stderr) if extra: - if args.verbose: + if verbose: print("Inserted a %s, %s" % (0, extra), file=sys.stderr) if end: cigar.append((0, extra)) @@ -138,12 +138,12 @@ def trim(segment, primer_pos, end): # update the position of the leftmost mappinng base segment.pos = pos - extra - if args.verbose: + if verbose: print("New pos: %s" % (segment.pos), file=sys.stderr) # if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion if cigar[0][0] == 2: - if args.verbose: + if verbose: print( "softmask created a leading deletion in the CIGAR, shuffling the alignment", file=sys.stderr, @@ -276,7 +276,7 @@ def handle_segment( # softmask the alignment if left primer start/end inside alignment if segment.reference_start < p1_position: try: - trim(segment, p1_position, False) + trim(segment, p1_position, False, args.verbose) if args.verbose: print( "ref start %s >= primer_position %s" @@ -295,7 +295,7 @@ def handle_segment( # softmask the alignment if right primer start/end inside alignment if segment.reference_end > p2_position: try: - trim(segment, p2_position, True) + trim(segment, p2_position, True, args.verbose) if args.verbose: print( "ref start %s >= primer_position %s" @@ -372,7 +372,7 @@ def generate_amplicons(bed: list): return amplicons -def normalise(trimmed_segments: dict, normalise: int, bed: list): +def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = False): """Normalise the depth of the trimmed segments to a given value. Perform per-amplicon normalisation using numpy vector maths to determine whether the segment in question would take the depth closer to the desired depth accross the amplicon. Args: @@ -405,7 +405,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list): amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int) if not segments: - if args.verbose: + if verbose: print( f"No segments assigned to amplicon {amplicon}, skipping", file=sys.stderr, @@ -516,7 +516,7 @@ def go(args): # normalise if requested if args.normalise: output_segments, mean_amp_depths = normalise( - trimmed_segments, args.normalise, bed + trimmed_segments, args.normalise, bed, args.verbose ) # write mean amplicon depths to file @@ -575,8 +575,6 @@ def main(): parser.add_argument("--verbose", action="store_true", help="Debug mode") parser.add_argument("--remove-incorrect-pairs", action="store_true") - global args - args = parser.parse_args() go(args) diff --git a/tests/align_trim_unit_test.py b/tests/align_trim_unit_test.py index 8f714ffe..1b5f60d0 100644 --- a/tests/align_trim_unit_test.py +++ b/tests/align_trim_unit_test.py @@ -92,10 +92,6 @@ def test_trim(): def testRunner(seg, expectedCIGAR): - global args - - args = SimpleNamespace(verbose=False) - # get the nearest primers to the alignment segment p1 = align_trim.find_primer(primerScheme, seg.reference_start, "+") p2 = align_trim.find_primer(primerScheme, seg.reference_end, "-") From 1a17e585147bb6f98676f11378da3cbcdafb44ef Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 08:55:10 +0000 Subject: [PATCH 4/6] Fix minion validator test --- tests/minion_validator.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/minion_validator.py b/tests/minion_validator.py index 259394ca..992870dc 100644 --- a/tests/minion_validator.py +++ b/tests/minion_validator.py @@ -184,13 +184,9 @@ def genCommand(sampleID, workflow): "--scheme-directory", dataDir + "primer-schemes", ] - if workflow == "medaka": - cmd.append("--model") - cmd.append("r941_min_high_g351") if workflow == "clair3": - cmd.append("--model") - cmd.append("r941_prom_hac_g360+g422") + cmd.append("--model r941_prom_hac_g360+g422") if sampleID in extraFlags[workflow]: for flag in extraFlags[workflow][sampleID]: From dea70ca809a0eb5c9ec1b8d4f408736fbcbf220d Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 08:58:34 +0000 Subject: [PATCH 5/6] Fix minion_validator again --- tests/minion_validator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/minion_validator.py b/tests/minion_validator.py index 992870dc..d036ed78 100644 --- a/tests/minion_validator.py +++ b/tests/minion_validator.py @@ -186,7 +186,8 @@ def genCommand(sampleID, workflow): ] if workflow == "clair3": - cmd.append("--model r941_prom_hac_g360+g422") + cmd.append("--model") + cmd.append("r941_prom_hac_g360+g422") if sampleID in extraFlags[workflow]: for flag in extraFlags[workflow][sampleID]: @@ -251,6 +252,7 @@ def runner(workflow, sampleID): args = parser.parse_args(cmd) except SystemExit: sys.stderr.write("failed to parse valid command for `artic minion`") + assert False # run the minion pipeline try: From 3122375eea73de8550d7e1b280d77510b6629262 Mon Sep 17 00:00:00 2001 From: BioWilko Date: Thu, 28 Nov 2024 09:14:41 +0000 Subject: [PATCH 6/6] Remove unused import --- tests/align_trim_unit_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/align_trim_unit_test.py b/tests/align_trim_unit_test.py index 1b5f60d0..d8bebab9 100644 --- a/tests/align_trim_unit_test.py +++ b/tests/align_trim_unit_test.py @@ -1,7 +1,6 @@ # align_trim_unit_test.py contains unit tests for alignment trimming import os import pysam -from types import SimpleNamespace from artic import align_trim from artic import utils