Skip to content

Develop sw #26

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,19 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
byteorder = "1.4.3"
zstd = "0.13.2"
rusqlite = { version = "0.31.0", features = ["bundled"] }
rayon = "1.10.0"
linreg = "0.2.0"
bytemuck = "1.13.1"
parquet = "42.0.0"
thiserror = "1.0.0"
memmap2 = "0.9.3"
rusqlite = { version = "0.31.0", features = ["bundled"], optional = true}
parquet = { version = "42.0.0", optional = true }

[features]
tdf = ["rusqlite"]
minitdf = ["parquet"]
default = ["tdf", "minitdf"]

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
Expand Down
26 changes: 20 additions & 6 deletions benches/speed_performance.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rayon::iter::ParallelIterator;
use timsrust::io::readers::{
FrameReader, SpectrumReader, SpectrumReaderConfig,
};
#[cfg(feature = "tdf")]
use timsrust::readers::FrameReader;
use timsrust::readers::{SpectrumReader, SpectrumReaderConfig};

const DDA_TEST: &str =
"/mnt/c/Users/Sander.Willems/Documents/data/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/";
Expand All @@ -11,14 +11,17 @@ const DIA_TEST: &str =
const SYP_TEST: &str =
"/mnt/c/Users/Sander.Willems/Documents/data/20230505_TIMS05_PaSk_SA_HeLa_6min_syP_5scans_30Da_S1-D4_1_2330.d/";

#[cfg(feature = "tdf")]
fn read_all_frames(frame_reader: &FrameReader) {
frame_reader.get_all();
}

#[cfg(feature = "tdf")]
fn read_all_ms1_frames(frame_reader: &FrameReader) {
frame_reader.get_all_ms1();
}

#[cfg(feature = "tdf")]
fn read_all_ms2_frames(frame_reader: &FrameReader) {
frame_reader.get_all_ms2();
}
Expand All @@ -27,13 +30,13 @@ fn read_all_spectra(spectrum_reader: &SpectrumReader) {
spectrum_reader.get_all();
}

fn criterion_benchmark_dda(c: &mut Criterion) {
#[cfg(feature = "tdf")]
fn criterion_benchmark_dda_frames(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let frame_reader = FrameReader::new(d_folder_name).unwrap();
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_frames 6m", |b| {
b.iter(|| read_all_frames(black_box(&frame_reader)))
});
Expand All @@ -43,12 +46,22 @@ fn criterion_benchmark_dda(c: &mut Criterion) {
group.bench_function("DDA read_all_ms2_frames 6m", |b| {
b.iter(|| read_all_ms2_frames(black_box(&frame_reader)))
});
group.finish();
}

fn criterion_benchmark_dda_spectra(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
group.significance_level(0.001).sample_size(10);
let d_folder_name: &str = DDA_TEST;
let spectrum_reader = SpectrumReader::new(d_folder_name).unwrap();
group.bench_function("DDA read_all_spectra 6m", |b| {
b.iter(|| read_all_spectra(black_box(&spectrum_reader)))
});
group.finish();
}

#[cfg(feature = "tdf")]
fn criterion_benchmark_dia(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
Expand All @@ -68,6 +81,7 @@ fn criterion_benchmark_dia(c: &mut Criterion) {
group.finish();
}

#[cfg(feature = "tdf")]
fn criterion_benchmark_syp(c: &mut Criterion) {
// c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20))));
let mut group = c.benchmark_group("sample-size-example");
Expand All @@ -89,7 +103,7 @@ fn criterion_benchmark_syp(c: &mut Criterion) {

criterion_group!(
benches,
criterion_benchmark_dda,
criterion_benchmark_dda_spectra,
// criterion_benchmark_dia,
// criterion_benchmark_syp
);
Expand Down
10 changes: 7 additions & 3 deletions src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
#[cfg(feature = "tdf")]
use crate::io::readers::{
FrameReaderError, MetadataReaderError, PrecursorReaderError,
QuadrupoleSettingsReaderError, SpectrumReaderError,
FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError,
};
use crate::io::readers::{PrecursorReaderError, SpectrumReaderError};

/// An error that is produced by timsrust (uses [thiserror]).
#[derive(thiserror::Error, Debug)]
pub enum Error {
pub enum TimsRustError {
#[cfg(feature = "tdf")]
#[error("{0}")]
FrameReaderError(#[from] FrameReaderError),
#[error("{0}")]
SpectrumReaderError(#[from] SpectrumReaderError),
#[cfg(feature = "tdf")]
#[error("{0}")]
MetadataReaderError(#[from] MetadataReaderError),
#[error("{0}")]
PrecursorReaderError(#[from] PrecursorReaderError),
#[cfg(feature = "tdf")]
#[error("{0}")]
QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError),
}
6 changes: 6 additions & 0 deletions src/io/readers.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
pub(crate) mod file_readers;
#[cfg(feature = "tdf")]
mod frame_reader;
#[cfg(feature = "tdf")]
mod metadata_reader;
mod precursor_reader;
#[cfg(feature = "tdf")]
mod quad_settings_reader;
mod spectrum_reader;

#[cfg(feature = "tdf")]
pub use frame_reader::*;
#[cfg(feature = "tdf")]
pub use metadata_reader::*;
pub use precursor_reader::*;
#[cfg(feature = "tdf")]
pub use quad_settings_reader::*;
pub use spectrum_reader::*;
2 changes: 2 additions & 0 deletions src/io/readers/file_readers.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#[cfg(feature = "minitdf")]
pub mod parquet_reader;
#[cfg(feature = "tdf")]
pub mod sql_reader;
pub mod tdf_blob_reader;
97 changes: 67 additions & 30 deletions src/io/readers/frame_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ use super::{
pub struct FrameReader {
path: PathBuf,
tdf_bin_reader: TdfBlobReader,
sql_frames: Vec<SqlFrame>,
frames: Vec<Frame>,
acquisition: AcquisitionType,
window_groups: Vec<u8>,
quadrupole_settings: Vec<Arc<QuadrupoleSettings>>,
offsets: Vec<usize>,
dia_windows: Option<Vec<Arc<QuadrupoleSettings>>>,
}

impl FrameReader {
Expand All @@ -50,6 +50,7 @@ impl FrameReader {
} else {
AcquisitionType::Unknown
};
// TODO should be refactored out to quadrupole reader
let mut window_groups = vec![0; sql_frames.len()];
let quadrupole_settings;
if acquisition == AcquisitionType::DIAPASEF {
Expand All @@ -64,36 +65,56 @@ impl FrameReader {
} else {
quadrupole_settings = vec![];
}
let quadrupole_settings = quadrupole_settings
.into_iter()
.map(|x| Arc::new(x))
.collect();
let frames = (0..sql_frames.len())
.into_par_iter()
.map(|index| {
get_frame_without_data(
index,
&sql_frames,
acquisition,
&window_groups,
&quadrupole_settings,
)
})
.collect();
let offsets = sql_frames.iter().map(|x| x.binary_offset).collect();
let reader = Self {
path: path.as_ref().to_path_buf(),
tdf_bin_reader,
sql_frames,
frames,
acquisition,
window_groups,
quadrupole_settings: quadrupole_settings
.into_iter()
.map(|x| Arc::new(x))
.collect(),
offsets,
dia_windows: match acquisition {
AcquisitionType::DIAPASEF => Some(quadrupole_settings),
_ => None,
},
};
Ok(reader)
}

pub fn parallel_filter<'a, F: Fn(&SqlFrame) -> bool + Sync + Send + 'a>(
pub fn parallel_filter<'a, F: Fn(&Frame) -> bool + Sync + Send + 'a>(
&'a self,
predicate: F,
) -> impl ParallelIterator<Item = Result<Frame, FrameReaderError>> + 'a
{
(0..self.len())
.into_par_iter()
.filter(move |x| predicate(&self.sql_frames[*x]))
.filter(move |x| predicate(&self.frames[*x]))
.map(move |x| self.get(x))
}

pub fn get_dia_windows(&self) -> Option<Vec<Arc<QuadrupoleSettings>>> {
self.dia_windows.clone()
}

pub fn get(&self, index: usize) -> Result<Frame, FrameReaderError> {
let mut frame: Frame = Frame::default();
let sql_frame = &self.sql_frames[index];
frame.index = sql_frame.id;
let blob = self.tdf_bin_reader.get(sql_frame.binary_offset)?;
let mut frame = self.frames[index].clone();
let offset = self.offsets[index];
let blob = self.tdf_bin_reader.get(offset)?;
let scan_count: usize =
blob.get(0).ok_or(FrameReaderError::CorruptFrame)? as usize;
let peak_count: usize = (blob.len() - scan_count) / 2;
Expand All @@ -105,18 +126,6 @@ impl FrameReader {
&blob,
&frame.scan_offsets,
)?;
frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type);
frame.rt = sql_frame.rt;
frame.acquisition_type = self.acquisition;
frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time;
if (self.acquisition == AcquisitionType::DIAPASEF)
& (frame.ms_level == MSLevel::MS2)
{
let window_group = self.window_groups[index];
frame.window_group = window_group;
frame.quadrupole_settings =
self.quadrupole_settings[window_group as usize - 1].clone();
}
Ok(frame)
}

Expand All @@ -125,19 +134,21 @@ impl FrameReader {
}

pub fn get_all_ms1(&self) -> Vec<Result<Frame, FrameReaderError>> {
self.parallel_filter(|x| x.msms_type == 0).collect()
self.parallel_filter(|x| x.ms_level == MSLevel::MS1)
.collect()
}

pub fn get_all_ms2(&self) -> Vec<Result<Frame, FrameReaderError>> {
self.parallel_filter(|x| x.msms_type != 0).collect()
self.parallel_filter(|x| x.ms_level == MSLevel::MS2)
.collect()
}

pub fn get_acquisition(&self) -> AcquisitionType {
self.acquisition
}

pub fn len(&self) -> usize {
self.sql_frames.len()
self.frames.len()
}

pub fn get_path(&self) -> PathBuf {
Expand Down Expand Up @@ -199,6 +210,32 @@ fn read_tof_indices(
Ok(tof_indices)
}

fn get_frame_without_data(
index: usize,
sql_frames: &Vec<SqlFrame>,
acquisition: AcquisitionType,
window_groups: &Vec<u8>,
quadrupole_settings: &Vec<Arc<QuadrupoleSettings>>,
) -> Frame {
let mut frame: Frame = Frame::default();
let sql_frame = &sql_frames[index];
frame.index = sql_frame.id;
frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type);
frame.rt = sql_frame.rt;
frame.acquisition_type = acquisition;
frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time;
if (acquisition == AcquisitionType::DIAPASEF)
& (frame.ms_level == MSLevel::MS2)
{
// TODO should be refactored out to quadrupole reader
let window_group = window_groups[index];
frame.window_group = window_group;
frame.quadrupole_settings =
quadrupole_settings[window_group as usize - 1].clone();
}
frame
}

#[derive(Debug, thiserror::Error)]
pub enum FrameReaderError {
#[error("{0}")]
Expand Down
Loading
Loading