Skip to content

Commit 94a7f57

Browse files
authored
bam::Record::set: Preserve aux data after set() (#188)
1 parent 05e0ee3 commit 94a7f57

File tree

2 files changed

+59
-17
lines changed

2 files changed

+59
-17
lines changed

src/bam/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,44 @@ CCCCCCCCCCCCCCCCCCC"[..],
11481148
assert_eq!(rec.aux(b"NM").unwrap(), Aux::Integer(15));
11491149
}
11501150

1151+
#[test]
1152+
fn test_set_repeated() {
1153+
let mut rec = Record::new();
1154+
rec.set(
1155+
b"123",
1156+
Some(&CigarString(vec![Cigar::Match(3)])),
1157+
b"AAA",
1158+
b"III",
1159+
);
1160+
rec.push_aux(b"AS", &Aux::Integer(12345));
1161+
assert_eq!(rec.qname(), b"123");
1162+
assert_eq!(rec.seq().as_bytes(), b"AAA");
1163+
assert_eq!(rec.qual(), b"III");
1164+
assert_eq!(rec.aux(b"AS").unwrap(), Aux::Integer(12345));
1165+
1166+
rec.set(
1167+
b"1234",
1168+
Some(&CigarString(vec![Cigar::SoftClip(1), Cigar::Match(3)])),
1169+
b"AAAA",
1170+
b"IIII",
1171+
);
1172+
assert_eq!(rec.qname(), b"1234");
1173+
assert_eq!(rec.seq().as_bytes(), b"AAAA");
1174+
assert_eq!(rec.qual(), b"IIII");
1175+
assert_eq!(rec.aux(b"AS").unwrap(), Aux::Integer(12345));
1176+
1177+
rec.set(
1178+
b"12",
1179+
Some(&CigarString(vec![Cigar::Match(2)])),
1180+
b"AA",
1181+
b"II",
1182+
);
1183+
assert_eq!(rec.qname(), b"12");
1184+
assert_eq!(rec.seq().as_bytes(), b"AA");
1185+
assert_eq!(rec.qual(), b"II");
1186+
assert_eq!(rec.aux(b"AS").unwrap(), Aux::Integer(12345));
1187+
}
1188+
11511189
#[test]
11521190
fn test_set_qname() {
11531191
let (names, _, seqs, quals, cigars) = gold();

src/bam/record.rs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -300,19 +300,18 @@ impl Record {
300300
}
301301

302302
/// Set variable length data (qname, cigar, seq, qual).
303-
/// Note: Pre-existing aux data will be invalidated
304-
/// if called on an existing record. For this
305-
/// reason, never call push_aux() before set(). `qual` is Phred-scaled
306-
/// quality values, without any offset.
303+
/// The aux data is left unchanged.
304+
/// `qual` is Phred-scaled quality values, without any offset.
307305
/// NOTE: seq.len() must equal qual.len() or this method
308306
/// will panic. If you don't have quality values use
309307
/// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
310308
/// be recognized as missing QVs by `samtools`.
311309
pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
312-
self.cigar = None;
313-
310+
assert!(qname.len() < 255);
314311
assert!(seq.len() == qual.len(), "seq.len() must equal qual.len()");
315312

313+
self.cigar = None;
314+
316315
let cigar_width = if let Some(cigar_string) = cigar {
317316
cigar_string.len()
318317
} else {
@@ -321,22 +320,30 @@ impl Record {
321320
let q_len = qname.len() + 1;
322321
let extranul = extranul_from_qname(qname);
323322

324-
self.inner_mut().l_data = (q_len
325-
+ extranul
326-
+ cigar_width
327-
+ ((seq.len() as f32 / 2.0).ceil() as usize)
328-
+ qual.len()) as i32;
329-
330-
assert!(qname.len() <= 256);
331-
323+
let orig_aux_offset = self.qname_capacity()
324+
+ 4 * self.cigar_len()
325+
+ (self.seq_len() + 1) / 2
326+
+ self.seq_len();
327+
let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
328+
assert!(orig_aux_offset <= self.inner.l_data as usize);
329+
let aux_len = self.inner.l_data as usize - orig_aux_offset;
330+
self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
332331
if (self.inner().m_data as i32) < self.inner().l_data {
333332
// Verbosity due to lexical borrowing
334333
let l_data = self.inner().l_data;
335334
self.realloc_var_data(l_data as usize);
336335
}
337336

337+
// Copy the aux data.
338+
if aux_len > 0 && orig_aux_offset != new_aux_offset {
339+
let data =
340+
unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
341+
data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
342+
}
343+
338344
let data =
339345
unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
346+
340347
// qname
341348
utils::copy_memory(qname, data);
342349
for i in 0..=extranul {
@@ -381,8 +388,6 @@ impl Record {
381388
}
382389

383390
/// Replace current qname with a new one.
384-
/// Unlike set(), this preserves all the variable length data including
385-
/// the aux.
386391
pub fn set_qname(&mut self, new_qname: &[u8]) {
387392
// 251 + 1NUL is the max 32-bit aligned value that fits in u8
388393
assert!(new_qname.len() < 252);
@@ -573,7 +578,6 @@ impl Record {
573578
}
574579

575580
/// Add auxiliary data.
576-
/// push_aux() should never be called before set().
577581
pub fn push_aux(&mut self, tag: &[u8], value: &Aux<'_>) {
578582
let ctag = tag.as_ptr() as *mut i8;
579583
let ret = unsafe {

0 commit comments

Comments
 (0)