Skip to content

Commit df74429

Browse files
committed
handle strings with commas. closes #49
1 parent 9c1bc4c commit df74429

File tree

5 files changed

+46
-22
lines changed

5 files changed

+46
-22
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "echtvar"
3-
version = "0.2.1"
3+
version = "0.2.2"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

rust-toolchain.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[toolchain]
22
# Nightly needed for portable simd feature
33
components = ["rustfmt", "clippy"]
4-
channel = "nightly-2023-06-15"
4+
channel = "nightly-2025-03-20"
55

src/commands/annotate_cmd.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,19 @@ pub fn annotate_main(
127127
}
128128
n += 1;
129129
// First check if the variant is *, skip those
130-
if record.alleles()[1][0] == b'*' {
130+
if record.alleles().len() < 2 || record.alleles()[1][0] == b'*' {
131131
let rid = record.rid().unwrap();
132132
let chrom = std::str::from_utf8(oheader_view.rid2name(rid).unwrap()).unwrap();
133133
// Only warn up to 10 times, just keep count in general
134134
if skip_warn < 10 {
135135
eprintln!(
136-
"contig {} pos {} alt has * value, skipping annotation, outputting entry as-is",
136+
"{}:{} alt missing or has * value, skipping annotation, outputting entry as-is",
137137
&chrom,
138138
record.pos() + 1
139139
);
140-
if skip_warn == 9 { eprintln!("not reporting further warnings") }
140+
if skip_warn == 9 {
141+
eprintln!("not reporting further warnings")
142+
}
141143
}
142144
skip_warn += 1;
143145
ovcf.write(&record).expect("failed to write record");
@@ -199,10 +201,7 @@ pub fn annotate_main(
199201
1000 * (n as u128) / mili,
200202
n_written,
201203
);
202-
eprintln!(
203-
"Skipped {} variants with * alt.",
204-
skip_warn,
205-
);
204+
eprintln!("Skipped {} variants with * alt.", skip_warn,);
206205

207206
/*
208207
//let ep = std::path::Path::new(&*epaths[0]);

src/commands/encoder_cmd.rs

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use bincode::Options;
22
use echtvar_lib::{echtvar::bstrip_chr, fields, kmer16, var32, zigzag};
3-
use rust_htslib::bcf::header::{TagLength, TagType, HeaderRecord};
3+
use rust_htslib::bcf::header::{HeaderRecord, TagLength, TagType};
44
use rust_htslib::bcf::record::{Buffer, Record};
55
use rust_htslib::bcf::{Read as BCFRead, Reader};
66
use stream_vbyte::{encode::encode, x86::Sse41};
@@ -74,7 +74,11 @@ fn get_string_field<'a, B: BorrowMut<Buffer> + Borrow<Buffer> + 'a>(
7474
.string()
7575
.unwrap_or(None)
7676
{
77-
Some(v) => unsafe { String::from_utf8_unchecked(v[0].to_vec()) },
77+
Some(v) => v
78+
.iter()
79+
.map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) })
80+
.collect::<Vec<String>>()
81+
.join(","),
7882
None => default.to_string(),
7983
}
8084
};
@@ -166,14 +170,23 @@ fn hdr_info_id2description(
166170
default: &std::string::String,
167171
) -> std::string::String {
168172
hrecs.retain(|rec| match rec {
169-
HeaderRecord::Info {key: _, values: v} => &v["ID"] == id,
170-
_ => false}
171-
);
173+
HeaderRecord::Info { key: _, values: v } => &v["ID"] == id,
174+
_ => false,
175+
});
172176
if hrecs.len() != 1 {
173-
panic!("Field {} is either not present in the header or present multiple times!", id);
177+
panic!(
178+
"Field {} is either not present in the header or present multiple times!",
179+
id
180+
);
174181
};
175182
let description = match hrecs.first().unwrap() {
176-
HeaderRecord::Info {key: _, values: v} => if v.contains_key("Description") { &v["Description"] } else { default },
183+
HeaderRecord::Info { key: _, values: v } => {
184+
if v.contains_key("Description") {
185+
&v["Description"]
186+
} else {
187+
default
188+
}
189+
}
177190
_ => default,
178191
};
179192
return description.trim_matches('"').to_string();
@@ -185,11 +198,11 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) {
185198

186199
let mut json = String::new();
187200
File::open(jpath)
188-
.expect("error opening json file")
201+
.unwrap_or_else(|_| panic!("error opening json file {:?}", jpath))
189202
.read_to_string(&mut json)
190-
.expect("error parsing json file");
203+
.unwrap_or_else(|_| panic!("error parsing json file {:?}", jpath));
191204
let mut fields: Vec<fields::Field> =
192-
json5::from_str(&json).expect("error reading json into fields");
205+
json5::from_str(&json).unwrap_or_else(|_| panic!("error reading json into fields {:?}", jpath));
193206

194207
let mut vcf = if !(*vpaths[0]).eq("/dev/stdin") && !(*vpaths[0]).eq("-") {
195208
Reader::from_path(vpaths[0]).expect("Error opening vcf.")
@@ -242,7 +255,8 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) {
242255
TagLength::Variable => f.number = ".".to_string(),
243256
};
244257
if f.field != "FILTER" && f.description == fields::default_description_string() {
245-
f.description = hdr_info_id2description(header.header_records(), &f.field, &f.description);
258+
f.description =
259+
hdr_info_id2description(header.header_records(), &f.field, &f.description);
246260
};
247261
}
248262

@@ -401,7 +415,18 @@ pub fn encoder_main(vpaths: Vec<&str>, opath: &str, jpath: &str) {
401415

402416
let mut alleles = rec.alleles();
403417
if alleles.len() == 1 {
404-
alleles.push(alleles[0]);
418+
/*
419+
let last_rid = rec.rid().unwrap() as i32;
420+
let n: &[u8] = header.rid2name(last_rid as u32).unwrap();
421+
let chrom = bstrip_chr(str::from_utf8(n).unwrap());
422+
eprintln!(
423+
"[echtvar] variant {}:{} has only one allele, encoding as {}/T",
424+
chrom,
425+
rec.pos(),
426+
String::from_utf8_lossy(alleles[0])
427+
);
428+
*/
429+
alleles.push(b"T"); // NOTE: we always encode . to T here.
405430
} else if alleles.len() != 2 {
406431
last_rid = rec.rid().unwrap() as i32;
407432
let n: &[u8] = header.rid2name(last_rid as u32).unwrap();

src/lib/echtvar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ impl EchtVars {
193193
} else {
194194
"Float"
195195
},
196-
if e.description.to_string() == fields::default_description_string() {
196+
if e.description == fields::default_description_string() {
197197
format!("added by echtvar from {}", path)
198198
} else {
199199
format!("added by echtvar {}", e.description.to_string())

0 commit comments

Comments
 (0)