Skip to content

1.5.6 PR (fixes #144) #145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 48 additions & 38 deletions artic/align_trim.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def find_primer(bed, pos, direction, threshold=20):
return closest


def trim(segment, primer_pos, end, debug):
def trim(segment, primer_pos, end, verbose=False):
"""Soft mask an alignment to fit within primer start/end sites.

Parameters
Expand All @@ -71,7 +71,7 @@ def trim(segment, primer_pos, end, debug):
The position in the reference to soft mask up to (equates to the start/end position of the primer in the reference)
end : bool
If True, the segment is being masked from the end (i.e. for the reverse primer)
debug : bool
vernose : bool
If True, will print soft masking info during trimming
"""
# get a copy of the cigar tuples to work with
Expand All @@ -93,13 +93,14 @@ def trim(segment, primer_pos, end, debug):
flag, length = cigar.pop()
else:
flag, length = cigar.pop(0)
if debug:
if verbose:
print("Chomped a %s, %s" % (flag, length), file=sys.stderr)
except IndexError:
print(
"Ran out of cigar during soft masking - completely masked read will be ignored",
file=sys.stderr,
)
if verbose:
print(
"Ran out of cigar during soft masking - completely masked read will be ignored",
file=sys.stderr,
)
break

# if the CIGAR operation consumes the reference sequence, increment/decrement the position by the CIGAR operation length
Expand All @@ -121,10 +122,10 @@ def trim(segment, primer_pos, end, debug):

# calculate how many extra matches are needed in the CIGAR
extra = abs(pos - primer_pos)
if debug:
if verbose:
print("extra %s" % (extra), file=sys.stderr)
if extra:
if debug:
if verbose:
print("Inserted a %s, %s" % (0, extra), file=sys.stderr)
if end:
cigar.append((0, extra))
Expand All @@ -137,12 +138,12 @@ def trim(segment, primer_pos, end, debug):

# update the position of the leftmost mappinng base
segment.pos = pos - extra
if debug:
if verbose:
print("New pos: %s" % (segment.pos), file=sys.stderr)

# if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
if cigar[0][0] == 2:
if debug:
if verbose:
print(
"softmask created a leading deletion in the CIGAR, shuffling the alignment",
file=sys.stderr,
Expand Down Expand Up @@ -188,34 +189,39 @@ def handle_segment(

# filter out unmapped and supplementary alignment segments
if segment.is_unmapped:
print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
if args.verbose:
print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
return False
if segment.is_supplementary:
print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
if args.verbose:
print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
return False
if segment.mapping_quality < min_mapq:
print(
"%s skipped as mapping quality below threshold" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as mapping quality below threshold" % (segment.query_name),
file=sys.stderr,
)
return False

# locate the nearest primers to this alignment segment
p1 = find_primer(bed, segment.reference_start, "+", args.primer_match_threshold)
p2 = find_primer(bed, segment.reference_end, "-", args.primer_match_threshold)

if not p1 or not p2:
print(
"%s skipped as no primer found for segment" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as no primer found for segment" % (segment.query_name),
file=sys.stderr,
)
return False

# check if primers are correctly paired and then assign read group
# NOTE: removed this as a function as only called once
# TODO: will try improving this / moving it to the primer scheme processing code
correctly_paired = p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]

correctly_paired = (
p1[2]["Primer_ID"].split("_")[1] == p2[2]["Primer_ID"].split("_")[1]
)

if not args.no_read_groups:
if correctly_paired:
Expand Down Expand Up @@ -247,10 +253,11 @@ def handle_segment(
report_writer.writerow(report)

if args.remove_incorrect_pairs and not correctly_paired:
print(
"%s skipped as not correctly paired" % (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s skipped as not correctly paired" % (segment.query_name),
file=sys.stderr,
)
return False

if args.verbose:
Expand Down Expand Up @@ -306,11 +313,12 @@ def handle_segment(

# check the the alignment still contains bases matching the reference
if "M" not in segment.cigarstring:
print(
"%s dropped as does not match reference post masking"
% (segment.query_name),
file=sys.stderr,
)
if args.verbose:
print(
"%s dropped as does not match reference post masking"
% (segment.query_name),
file=sys.stderr,
)
return False

return (amplicon, segment)
Expand Down Expand Up @@ -364,7 +372,7 @@ def generate_amplicons(bed: list):
return amplicons


def normalise(trimmed_segments: dict, normalise: int, bed: list):
def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool = False):
"""Normalise the depth of the trimmed segments to a given value. Perform per-amplicon normalisation using numpy vector maths to determine whether the segment in question would take the depth closer to the desired depth accross the amplicon.

Args:
Expand Down Expand Up @@ -397,10 +405,11 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list):
amplicon_depth = np.zeros((amplicons[amplicon]["length"],), dtype=int)

if not segments:
print(
f"No segments assigned to amplicon {amplicon}, skipping",
file=sys.stderr,
)
if verbose:
print(
f"No segments assigned to amplicon {amplicon}, skipping",
file=sys.stderr,
)
continue

random.shuffle(segments)
Expand Down Expand Up @@ -507,7 +516,7 @@ def go(args):
# normalise if requested
if args.normalise:
output_segments, mean_amp_depths = normalise(
trimmed_segments, args.normalise, bed
trimmed_segments, args.normalise, bed, args.verbose
)

# write mean amplicon depths to file
Expand Down Expand Up @@ -567,6 +576,7 @@ def main():
parser.add_argument("--remove-incorrect-pairs", action="store_true")

args = parser.parse_args()

go(args)


Expand Down
12 changes: 10 additions & 2 deletions artic/minion.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,19 @@ def run(parser, args):
normalise_string = ""

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.trimmed.rg.sorted.bam"
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --remove-incorrect-pairs --min-mapq {args.min_mapq} --report {args.sample}.alignreport.csv < {args.sample}.sorted.bam > {args.sample}.trimmed.rg.bam"
)

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam 2> {args.sample}.alignreport.er | samtools sort -T {args.sample} - -o {args.sample}.primertrimmed.rg.sorted.bam"
f"samtools sort -T {args.sample} {args.sample}.trimmed.rg.bam -o {args.sample}.trimmed.rg.sorted.bam"
)

cmds.append(
f"align_trim {normalise_string} {bed} --primer-match-threshold {args.primer_match_threshold} --min-mapq {args.min_mapq} --remove-incorrect-pairs --trim-primers --report {args.sample}.alignreport.csv --amp-depth-report {args.sample}.amplicon_depths.tsv < {args.sample}.sorted.bam > {args.sample}.primertrimmed.rg.bam"
)

cmds.append(
f"samtools sort -T {args.sample} {args.sample}.primertrimmed.rg.bam -o {args.sample}.primertrimmed.rg.sorted.bam"
)

cmds.append(f"samtools index {args.sample}.trimmed.rg.sorted.bam")
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = artic
version = 1.5.5
version = 1.5.6
author = Nick Loman
author_email = n.j.loman@bham.ac.uk
maintainer = Sam Wilkinson
Expand Down
5 changes: 2 additions & 3 deletions tests/align_trim_unit_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# align_trim_unit_test.py contains unit tests for alignment trimming
import os
import pysam
import pytest

from artic import align_trim
from artic import utils
Expand Down Expand Up @@ -114,7 +113,7 @@ def testRunner(seg, expectedCIGAR):

# trim the forward primer
try:
align_trim.trim(seg, p1_position, False, False)
align_trim.trim(seg, p1_position, False)
except Exception as e:
raise Exception(
"problem soft masking left primer in {} (error: {})".format(
Expand All @@ -132,7 +131,7 @@ def testRunner(seg, expectedCIGAR):

# trim the reverse primer
try:
align_trim.trim(seg, p2_position, True, False)
align_trim.trim(seg, p2_position, True)
except Exception as e:
raise Exception(
"problem soft masking right primer in {} (error: {})".format(
Expand Down
4 changes: 1 addition & 3 deletions tests/minion_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,6 @@ def genCommand(sampleID, workflow):
"--scheme-directory",
dataDir + "primer-schemes",
]
if workflow == "medaka":
cmd.append("--model")
cmd.append("r941_min_high_g351")

if workflow == "clair3":
cmd.append("--model")
Expand Down Expand Up @@ -255,6 +252,7 @@ def runner(workflow, sampleID):
args = parser.parse_args(cmd)
except SystemExit:
sys.stderr.write("failed to parse valid command for `artic minion`")
assert False

# run the minion pipeline
try:
Expand Down