diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9abeb7f..16e2822 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build - run: cargo build --verbose + run: cargo build --release --verbose - name: File sizes run: find src/ -name '*.rs' | xargs wc -l | sort -nr - name: Run tests diff --git a/Cargo.lock b/Cargo.lock index 04cfc3b..f9c1b71 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,10 +3,10 @@ version = 3 [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -69,15 +69,15 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "arrow-array" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea9a0fd21121304cad96f307c938d861cb1e7f0c151b93047462cd9817d760fb" +checksum = "7f16835e8599dbbb1659fd869d865254c4cf32c6c2bb60b6942ac9fc36bfa5da" dependencies = [ "ahash", "arrow-buffer", @@ -91,36 +91,40 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ce342ecf5971004e23cef8b5fb3bacd2bbc48a381464144925074e1472e9eb" +checksum = "1a1f34f0faae77da6b142db61deba2cb6d60167592b178be317b341440acba80" dependencies = [ + "bytes", "half", "num", ] [[package]] name = "arrow-cast" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b94a0ce7d27abbb02e2ee4db770f593127610f57b32625b0bc6a1a90d65f085" +checksum = "450e4abb5775bca0740bec0bcf1b1a5ae07eff43bd625661c4436d8e8e4540c4" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", + "atoi", + "base64", "chrono", "half", "lexical-core", "num", + "ryu", ] [[package]] name = "arrow-data" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d9a83dad6a53d6907765106d3bc61d6d9d313cfe1751701b3ef0948e7283dc2" +checksum = "2b1e618bbf714c7a9e8d97203c806734f012ff71ae3adc8ad1b075689f540634" dependencies = [ "arrow-buffer", "arrow-schema", @@ -130,9 +134,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a46da5e438a854e0386b38774da88a98782c0973c6dbc5c949ca4e02faf9b016" +checksum = "f98e983549259a2b97049af7edfb8f28b8911682040e99a94e4ceb1196bd65c2" dependencies = [ "arrow-array", "arrow-buffer", @@ -144,16 +148,17 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9ed245bd2d7d97ad1457cb281d4296e8b593588758b8fec6d67b2b2b0f2265" +checksum = "fbf0388a18fd7f7f3fe3de01852d30f54ed5182f9004db700fbe3ba843ed2794" [[package]] name = "arrow-select" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc9bd6aebc565b1d04bae64a0f4dda3abc677190eb7d960471b1b20e1cebed0" +checksum = "b83e5723d307a38bf00ecd2972cd078d1339c7fd3eb044f609958a9a24463f3a" dependencies = [ + "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -161,17 +166,26 @@ dependencies = [ "num", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "base64" -version = "0.21.7" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bitflags" @@ -187,9 +201,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "brotli" -version = "3.5.0" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -198,9 +212,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -214,9 +228,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" [[package]] name = "byteorder" @@ -226,9 +240,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "cast" @@ -238,13 +252,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.0" +version = "1.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8" +checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -294,18 +308,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.9" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.9" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" dependencies = [ "anstyle", "clap_lex", @@ -313,9 +327,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "const-random" @@ -339,9 +353,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "crc32fast" @@ -450,9 +464,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "flatbuffers" -version = "23.5.26" +version = "24.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -460,9 +474,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" dependencies = [ "crc32fast", "miniz_oxide", @@ -510,15 +524,15 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -545,13 +559,13 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "is-terminal" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ "hermit-abi", "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -571,27 +585,27 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] [[package]] name = "lexical-core" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -602,9 +616,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -613,9 +627,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.6" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" dependencies = [ "lexical-util", "static_assertions", @@ -623,18 +637,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.5" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" dependencies = [ "lexical-util", "lexical-write-integer", @@ -643,9 +657,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "0.8.5" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" dependencies = [ "lexical-util", "static_assertions", @@ -653,9 +667,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libm" @@ -665,9 +679,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libsqlite3-sys" -version = "0.28.0" +version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" dependencies = [ "cc", "pkg-config", @@ -691,23 +705,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] -name = "lz4" -version = "1.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.5" +name = "lz4_flex" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" dependencies = [ - "cc", - "libc", + "twox-hash", ] [[package]] @@ -718,20 +721,20 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", ] [[package]] name = "miniz_oxide" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "adler", + "adler2", ] [[package]] @@ -810,9 +813,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oorandom" @@ -831,9 +834,9 @@ dependencies = [ [[package]] name = "parquet" -version = "42.0.0" +version = "53.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" +checksum = "310c46a70a3ba90d98fec39fa2da6d9d731e544191da6fb56c9d199484d0dd3e" dependencies = [ "ahash", "arrow-array", @@ -848,8 +851,9 @@ dependencies = [ "bytes", "chrono", "flate2", + "half", "hashbrown", - "lz4", + "lz4_flex", "num", "num-bigint", "paste", @@ -857,7 +861,8 @@ dependencies = [ "snap", "thrift", "twox-hash", - "zstd 0.12.4", + "zstd", + "zstd-sys", ] [[package]] @@ -868,15 +873,15 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "plotters" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -887,33 +892,33 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -940,9 +945,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.5" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", @@ -952,9 +957,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -963,15 +968,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rusqlite" -version = "0.31.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" +checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" dependencies = [ "bitflags 2.6.0", "fallible-iterator", @@ -983,9 +988,9 @@ dependencies = [ [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] @@ -1019,29 +1024,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.79", ] [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" dependencies = [ "itoa", "memchr", @@ -1049,6 +1054,12 @@ dependencies = [ "serde", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" @@ -1080,9 +1091,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.70" +version = "2.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" dependencies = [ "proc-macro2", "quote", @@ -1091,22 +1102,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.79", ] [[package]] @@ -1120,9 +1131,15 @@ dependencies = [ "ordered-float", ] +[[package]] +name = "timscompress" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4423059b0eec2a116d2b8e4b40d4faaad1b72ae17df8718c66520e0e876885" + [[package]] name = "timsrust" -version = "0.4.1" +version = "0.4.2" dependencies = [ "bytemuck", "criterion", @@ -1134,7 +1151,8 @@ dependencies = [ "serde", "serde_json", "thiserror", - "zstd 0.13.2", + "timscompress", + "zstd", ] [[package]] @@ -1168,9 +1186,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "vcpkg" @@ -1180,9 +1198,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" @@ -1202,34 +1220,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.79", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1237,28 +1256,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.79", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -1266,11 +1285,11 @@ dependencies = [ [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1291,6 +1310,15 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -1372,16 +1400,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", -] - -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", + "syn 2.0.79", ] [[package]] @@ -1390,33 +1409,23 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe 7.2.0", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 2d50312..c40b783 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "timsrust" -version = "0.4.1" +version = "0.4.2" edition = "2021" description = "A crate to read Bruker timsTOF data" license = "Apache-2.0" @@ -17,13 +17,14 @@ keywords = ["MS", "LC-TIMS-TOF", "PASEF"] zstd = "0.13.2" rayon = "1.10.0" linreg = "0.2.0" -bytemuck = "1.13.1" +bytemuck = "1.18.0" thiserror = "1.0.0" memmap2 = "0.9.3" -rusqlite = { version = "0.31.0", features = ["bundled"], optional = true} -parquet = { version = "42.0.0", optional = true } -serde = { version = "1.0.209", features = ["derive"], optional = true } -serde_json = { version = "1.0.127", optional = true } +rusqlite = { version = "0.32.0", features = ["bundled"], optional = true } +parquet = { version = "53.0.0", optional = true } +serde = { version = "1.0.210", features = ["derive"], optional = true } +serde_json = { version = "1.0.128", optional = true } +timscompress = {version = "0.1.0", optional=true} [features] tdf = ["rusqlite"] diff --git a/README.md b/README.md index 5b0ffac..be2aef5 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ TODO * Improve docs * Improve tests * Pase CompressionType1 +* Tarred file reader +* Clean up src (FrameReader, ...) +* Cleaner try_from conversions/readers * Make Path of TimsTOF data into special type * Single access point for all readers? * Few unchecked unwraps left diff --git a/benches/speed_performance.rs b/benches/speed_performance.rs index d9ff504..1944962 100644 --- a/benches/speed_performance.rs +++ b/benches/speed_performance.rs @@ -2,7 +2,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rayon::iter::ParallelIterator; #[cfg(feature = "tdf")] use timsrust::readers::FrameReader; -use timsrust::readers::{SpectrumReader, SpectrumReaderConfig}; +use timsrust::readers::SpectrumReader; const DDA_TEST: &str = "/mnt/d/data/mpib/tims05_300SPD/20230505_TIMS05_PaSk_MA_HeLa_6min_ddaP_S1-C10_1_2323.d/"; diff --git a/src/domain_converters.rs b/src/domain_converters.rs index e38bbf4..7aedb74 100644 --- a/src/domain_converters.rs +++ b/src/domain_converters.rs @@ -10,6 +10,5 @@ pub use tof_to_mz::Tof2MzConverter; /// Convert from one domain (e.g. Time of Flight) to another (m/z). pub trait ConvertableDomain { fn convert + Copy>(&self, value: T) -> f64; - fn invert + Copy>(&self, value: T) -> f64; } diff --git a/src/errors.rs b/src/errors.rs index 7758b0b..dfa4b0b 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -2,7 +2,7 @@ use crate::io::readers::{ FrameReaderError, MetadataReaderError, QuadrupoleSettingsReaderError, }; -use crate::io::readers::{PrecursorReaderError, SpectrumReaderError}; +use crate::{io::readers::PrecursorReaderError, readers::SpectrumReaderError}; /// An error that is produced by timsrust (uses [thiserror]). #[derive(thiserror::Error, Debug)] diff --git a/src/io/readers.rs b/src/io/readers.rs index c13f808..b33f6a3 100644 --- a/src/io/readers.rs +++ b/src/io/readers.rs @@ -7,6 +7,7 @@ mod precursor_reader; #[cfg(feature = "tdf")] mod quad_settings_reader; mod spectrum_reader; +mod timstof; #[cfg(feature = "tdf")] pub use frame_reader::*; @@ -16,3 +17,4 @@ pub use precursor_reader::*; #[cfg(feature = "tdf")] pub use quad_settings_reader::*; pub use spectrum_reader::*; +pub use timstof::*; diff --git a/src/io/readers/file_readers/parquet_reader.rs b/src/io/readers/file_readers/parquet_reader.rs index a5c6927..72b44be 100644 --- a/src/io/readers/file_readers/parquet_reader.rs +++ b/src/io/readers/file_readers/parquet_reader.rs @@ -1,7 +1,10 @@ pub mod precursors; +use std::{fs::File, io, str::FromStr}; + use parquet::file::reader::{FileReader, SerializedFileReader}; -use std::{fs::File, io, path::Path, str::FromStr}; + +use crate::readers::TimsTofPathError; pub trait ReadableParquetTable { fn update_from_parquet_file(&mut self, key: &str, value: String); @@ -11,35 +14,37 @@ pub trait ReadableParquetTable { } fn from_parquet_file( - file_name: impl AsRef, - ) -> Result, ParquetError> + path: impl crate::readers::TimsTofPathLike, + ) -> Result, ParquetReaderError> where Self: Sized + Default, { - let file: File = File::open(file_name)?; + let path = path.to_timstof_path()?; + let file: File = File::open(path.ms2_parquet()?)?; let reader: SerializedFileReader = SerializedFileReader::new(file)?; - let results: Vec = reader + reader .get_row_iter(None)? .map(|record| { let mut result = Self::default(); - for (name, field) in record.get_column_iter() { + for (name, field) in record?.get_column_iter() { result.update_from_parquet_file( name.to_string().as_str(), field.to_string(), ); } - result + Ok(result) }) - .collect(); - Ok(results) + .collect() } } #[derive(Debug, thiserror::Error)] -pub enum ParquetError { +pub enum ParquetReaderError { #[error("{0}")] IO(#[from] io::Error), #[error("Cannot iterate over row {0}")] - ParquetIO(#[from] parquet::errors::ParquetError), + ParquetError(#[from] parquet::errors::ParquetError), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), } diff --git a/src/io/readers/file_readers/sql_reader.rs b/src/io/readers/file_readers/sql_reader.rs index 279aa06..af24d0a 100644 --- a/src/io/readers/file_readers/sql_reader.rs +++ b/src/io/readers/file_readers/sql_reader.rs @@ -5,31 +5,29 @@ pub mod pasef_frame_msms; pub mod precursors; pub mod quad_settings; -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; +use std::collections::HashMap; use rusqlite::{types::FromSql, Connection}; +use crate::readers::{TimsTofPathError, TimsTofPathLike}; + #[derive(Debug)] pub struct SqlReader { connection: Connection, - path: PathBuf, } impl SqlReader { - pub fn open(file_name: impl AsRef) -> Result { - let path = file_name.as_ref().to_path_buf(); - let connection = Connection::open(&path)?; - Ok(Self { connection, path }) + pub fn open(path: impl TimsTofPathLike) -> Result { + let path = path.to_timstof_path()?; + let connection = Connection::open(&path.tdf()?)?; + Ok(Self { connection }) } pub fn read_column_from_table( &self, column_name: &str, table_name: &str, - ) -> Result, SqlError> { + ) -> Result, SqlReaderError> { let query = format!("SELECT {} FROM {}", column_name, table_name); let mut stmt = self.connection.prepare(&query)?; let rows = stmt.query_map([], |row| match row.get::(0) { @@ -39,10 +37,6 @@ impl SqlReader { let result = rows.collect::, _>>()?; Ok(result) } - - pub fn get_path(&self) -> PathBuf { - self.path.clone() - } } pub trait ReadableSqlTable { @@ -50,7 +44,7 @@ pub trait ReadableSqlTable { fn from_sql_row(row: &rusqlite::Row) -> Self; - fn from_sql_reader(reader: &SqlReader) -> Result, SqlError> + fn from_sql_reader(reader: &SqlReader) -> Result, SqlReaderError> where Self: Sized, { @@ -59,7 +53,9 @@ pub trait ReadableSqlTable { let rows = stmt.query_map([], |row| Ok(Self::from_sql_row(row)))?; let result = rows.collect::, _>>()?; if result.len() == 0 { - Err(SqlError(rusqlite::Error::QueryReturnedNoRows)) + Err(SqlReaderError::SqlError( + rusqlite::Error::QueryReturnedNoRows, + )) } else { Ok(result) } @@ -71,7 +67,7 @@ pub trait ReadableSqlHashMap { fn from_sql_reader( reader: &SqlReader, - ) -> Result, SqlError> + ) -> Result, SqlReaderError> where Self: Sized, { @@ -99,6 +95,10 @@ impl ParseDefault for rusqlite::Row<'_> { } } -#[derive(thiserror::Error, Debug)] -#[error("{0}")] -pub struct SqlError(#[from] rusqlite::Error); +#[derive(Debug, thiserror::Error)] +pub enum SqlReaderError { + #[error("{0}")] + SqlError(#[from] rusqlite::Error), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), +} diff --git a/src/io/readers/file_readers/tdf_blob_reader.rs b/src/io/readers/file_readers/tdf_blob_reader.rs index d8e64fc..f5aa4df 100644 --- a/src/io/readers/file_readers/tdf_blob_reader.rs +++ b/src/io/readers/file_readers/tdf_blob_reader.rs @@ -3,80 +3,107 @@ mod tdf_blobs; use memmap2::Mmap; use std::fs::File; use std::io; -use std::path::Path; pub use tdf_blobs::*; use zstd::decode_all; +use crate::readers::{TimsTofFileType, TimsTofPathError, TimsTofPathLike}; + const U32_SIZE: usize = std::mem::size_of::(); const HEADER_SIZE: usize = 2; #[derive(Debug)] pub struct TdfBlobReader { - mmap: Mmap, - global_file_offset: usize, + bin_file_reader: TdfBinFileReader, } impl TdfBlobReader { - // TODO parse compression1 - pub fn new( - file_name: impl AsRef, - ) -> Result { - let path = file_name.as_ref().to_path_buf(); - let file = File::open(&path)?; - let mmap = unsafe { Mmap::map(&file)? }; - let reader = Self { - mmap, - global_file_offset: 0, - }; + pub fn new(path: impl TimsTofPathLike) -> Result { + let bin_file_reader = TdfBinFileReader::new(path)?; + let reader = Self { bin_file_reader }; Ok(reader) } pub fn get(&self, offset: usize) -> Result { - let offset = self.global_file_offset + offset; + let offset = self.bin_file_reader.global_file_offset + offset; let byte_count = self + .bin_file_reader .get_byte_count(offset) .ok_or(TdfBlobReaderError::InvalidOffset(offset))?; - let compressed_bytes = self - .get_compressed_bytes(offset, byte_count) + let data = self + .bin_file_reader + .get_data(offset, byte_count) .ok_or(TdfBlobReaderError::CorruptData)?; - let bytes = decode_all(compressed_bytes) - .map_err(|_| TdfBlobReaderError::Decompression)?; + let bytes = + decode_all(data).map_err(|_| TdfBlobReaderError::Decompression)?; let blob = TdfBlob::new(bytes)?; Ok(blob) } +} + +#[derive(Debug)] +struct TdfBinFileReader { + mmap: Mmap, + global_file_offset: usize, +} + +impl TdfBinFileReader { + // TODO parse compression1 + fn new(path: impl TimsTofPathLike) -> Result { + let path = path.to_timstof_path()?; + let bin_path = match path.file_type() { + #[cfg(feature = "tdf")] + TimsTofFileType::TDF => path.tdf_bin()?, + #[cfg(feature = "minitdf")] + TimsTofFileType::MiniTDF => path.ms2_bin()?, + }; + let file = File::open(bin_path)?; + let mmap = unsafe { Mmap::map(&file)? }; + let reader = Self { + mmap, + global_file_offset: 0, + }; + Ok(reader) + } fn get_byte_count(&self, offset: usize) -> Option { let start = offset as usize; - let end = (offset + U32_SIZE) as usize; + let end = start + U32_SIZE as usize; let raw_byte_count = self.mmap.get(start..end)?; let byte_count = u32::from_le_bytes(raw_byte_count.try_into().ok()?) as usize; Some(byte_count) } - fn get_compressed_bytes( - &self, - offset: usize, - byte_count: usize, - ) -> Option<&[u8]> { + // fn get_scan_count(&self, offset: usize) -> Option { + // let start = (offset + U32_SIZE) as usize; + // let end = start + U32_SIZE as usize; + // let raw_scan_count = self.mmap.get(start..end)?; + // let scan_count = + // u32::from_le_bytes(raw_scan_count.try_into().ok()?) as usize; + // Some(scan_count) + // } + + fn get_data(&self, offset: usize, byte_count: usize) -> Option<&[u8]> { let start = offset + HEADER_SIZE * U32_SIZE; let end = offset + byte_count; self.mmap.get(start..end) } } +#[cfg(feature = "minitdf")] #[derive(Debug)] pub struct IndexedTdfBlobReader { blob_reader: TdfBlobReader, binary_offsets: Vec, } +#[cfg(feature = "minitdf")] impl IndexedTdfBlobReader { pub fn new( - file_name: impl AsRef, + path: impl TimsTofPathLike, binary_offsets: Vec, ) -> Result { - let blob_reader = TdfBlobReader::new(file_name)?; + let blob_reader = TdfBlobReader::new(path)?; let reader = Self { binary_offsets, blob_reader: blob_reader, @@ -109,12 +136,17 @@ pub enum TdfBlobReaderError { Decompression, #[error("Invalid offset {0}")] InvalidOffset(usize), + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), + #[error("No binary file found")] + NoBinary, } #[derive(Debug, thiserror::Error)] pub enum IndexedTdfBlobReaderError { #[error("{0}")] TdfBlobReaderError(#[from] TdfBlobReaderError), + #[cfg(feature = "minitdf")] #[error("Invalid index {0}")] InvalidIndex(usize), } diff --git a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs index 6445244..075ffc0 100644 --- a/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs +++ b/src/io/readers/file_readers/tdf_blob_reader/tdf_blobs.rs @@ -14,6 +14,15 @@ impl TdfBlob { } } + #[cfg(feature = "minitdf")] + pub fn get_all(&self) -> Vec { + (0..self.len()) + .map(|index| self.get(index).expect( + "When iterating over the length of a tdf blob, you cannot go out of bounds" + )) + .collect() + } + pub fn get(&self, index: usize) -> Option { if index >= self.len() { None @@ -38,6 +47,7 @@ impl TdfBlob { self.bytes.len() / BLOB_TYPE_SIZE } + #[cfg(feature = "minitdf")] pub fn is_empty(&self) -> bool { self.len() == 0 } diff --git a/src/io/readers/frame_reader.rs b/src/io/readers/frame_reader.rs index ce956b7..6a0767a 100644 --- a/src/io/readers/frame_reader.rs +++ b/src/io/readers/frame_reader.rs @@ -1,48 +1,57 @@ -use std::{ - path::{Path, PathBuf}, - sync::Arc, - vec, -}; +use std::sync::Arc; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +#[cfg(feature = "timscompress")] +use timscompress::reader::CompressedTdfBlobReader; -use crate::{ - ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}, - utils::find_extension, -}; +use crate::ms_data::{AcquisitionType, Frame, MSLevel, QuadrupoleSettings}; use super::{ file_readers::{ sql_reader::{ frame_groups::SqlWindowGroup, frames::SqlFrame, ReadableSqlTable, - SqlError, SqlReader, + SqlReader, SqlReaderError, }, tdf_blob_reader::{TdfBlob, TdfBlobReader, TdfBlobReaderError}, }, - QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, + MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, + QuadrupoleSettingsReaderError, TimsTofPathLike, }; #[derive(Debug)] pub struct FrameReader { - path: PathBuf, tdf_bin_reader: TdfBlobReader, + #[cfg(feature = "timscompress")] + compressed_reader: CompressedTdfBlobReader, frames: Vec, acquisition: AcquisitionType, offsets: Vec, dia_windows: Option>>, + compression_type: u8, + #[cfg(feature = "timscompress")] + scan_count: usize, } impl FrameReader { - pub fn new(path: impl AsRef) -> Result { - let sql_path = find_extension(&path, "analysis.tdf").ok_or( - FrameReaderError::FileNotFound("analysis.tdf".to_string()), - )?; - let tdf_sql_reader = SqlReader::open(sql_path)?; + pub fn new(path: impl TimsTofPathLike) -> Result { + let compression_type = + match MetadataReader::new(&path)?.compression_type { + 2 => 2, + #[cfg(feature = "timscompress")] + 3 => 3, + compression_type => { + return Err(FrameReaderError::CompressionTypeError( + compression_type, + )) + }, + }; + + let tdf_sql_reader = SqlReader::open(&path)?; let sql_frames = SqlFrame::from_sql_reader(&tdf_sql_reader)?; - let bin_path = find_extension(&path, "analysis.tdf_bin").ok_or( - FrameReaderError::FileNotFound("analysis.tdf_bin".to_string()), - )?; - let tdf_bin_reader = TdfBlobReader::new(bin_path)?; + let tdf_bin_reader = TdfBlobReader::new(&path)?; + #[cfg(feature = "timscompress")] + let compressed_reader = CompressedTdfBlobReader::new(&path) + .ok_or_else(|| FrameReaderError::TimscompressError)?; let acquisition = if sql_frames.iter().any(|x| x.msms_type == 8) { AcquisitionType::DDAPASEF } else if sql_frames.iter().any(|x| x.msms_type == 9) { @@ -60,11 +69,11 @@ impl FrameReader { window_groups[window_group.frame - 1] = window_group.window_group; } - quadrupole_settings = - QuadrupoleSettingsReader::new(tdf_sql_reader.get_path())?; + quadrupole_settings = QuadrupoleSettingsReader::new(&path)?; } else { quadrupole_settings = vec![]; } + // TODO move Arc to quad settings reader? let quadrupole_settings = quadrupole_settings .into_iter() .map(|x| Arc::new(x)) @@ -81,9 +90,15 @@ impl FrameReader { ) }) .collect(); + #[cfg(feature = "timscompress")] + let scan_count = sql_frames + .iter() + .map(|frame| frame.scan_count) + .max() + .expect("Frame table cannot be empty") + as usize; let offsets = sql_frames.iter().map(|x| x.binary_offset).collect(); let reader = Self { - path: path.as_ref().to_path_buf(), tdf_bin_reader, frames, acquisition, @@ -92,10 +107,20 @@ impl FrameReader { AcquisitionType::DIAPASEF => Some(quadrupole_settings), _ => None, }, + compression_type, + #[cfg(feature = "timscompress")] + compressed_reader, + #[cfg(feature = "timscompress")] + scan_count, }; Ok(reader) } + // TODO make option result + pub fn get_binary_offset(&self, index: usize) -> usize { + self.offsets[index] + } + pub fn parallel_filter<'a, F: Fn(&Frame) -> bool + Sync + Send + 'a>( &'a self, predicate: F, @@ -107,14 +132,37 @@ impl FrameReader { .map(move |x| self.get(x)) } + pub fn filter<'a, F: Fn(&Frame) -> bool + Sync + Send + 'a>( + &'a self, + predicate: F, + ) -> impl Iterator> + 'a { + (0..self.len()) + .filter(move |x| predicate(&self.frames[*x])) + .map(move |x| self.get(x)) + } + pub fn get_dia_windows(&self) -> Option>> { self.dia_windows.clone() } pub fn get(&self, index: usize) -> Result { + match self.compression_type { + 2 => self.get_from_compression_type_2(index), + #[cfg(feature = "timscompress")] + 3 => self.get_from_compression_type_3(index), + _ => Err(FrameReaderError::CompressionTypeError( + self.compression_type, + )), + } + } + + fn get_from_compression_type_2( + &self, + index: usize, + ) -> Result { // NOTE: get does it by 0-offsetting the vec, not by Frame index!!! - let mut frame = self.frames[index].clone(); - let offset = self.offsets[index]; + let mut frame = self.get_frame_without_coordinates(index)?; + let offset = self.get_binary_offset(index); let blob = self.tdf_bin_reader.get(offset)?; let scan_count: usize = blob.get(0).ok_or(FrameReaderError::CorruptFrame)? as usize; @@ -130,6 +178,36 @@ impl FrameReader { Ok(frame) } + #[cfg(feature = "timscompress")] + fn get_from_compression_type_3( + &self, + index: usize, + ) -> Result { + // NOTE: get does it by 0-offsetting the vec, not by Frame index!!! + // TODO + let mut frame = self.get_frame_without_coordinates(index)?; + let offset = self.get_binary_offset(index); + let raw_frame = self + .compressed_reader + .get_raw_frame_data(offset, self.scan_count); + frame.tof_indices = raw_frame.tof_indices; + frame.intensities = raw_frame.intensities; + frame.scan_offsets = raw_frame.scan_offsets; + Ok(frame) + } + + pub fn get_frame_without_coordinates( + &self, + index: usize, + ) -> Result { + let frame = self + .frames + .get(index) + .ok_or(FrameReaderError::IndexOutOfBounds)? + .clone(); + Ok(frame) + } + pub fn get_all(&self) -> Vec> { self.parallel_filter(|_| true).collect() } @@ -151,10 +229,6 @@ impl FrameReader { pub fn len(&self) -> usize { self.frames.len() } - - pub fn get_path(&self) -> PathBuf { - self.path.clone() - } } fn read_scan_offsets( @@ -222,7 +296,7 @@ fn get_frame_without_data( let sql_frame = &sql_frames[index]; frame.index = sql_frame.id; frame.ms_level = MSLevel::read_from_msms_type(sql_frame.msms_type); - frame.rt = sql_frame.rt; + frame.rt_in_seconds = sql_frame.rt; frame.acquisition_type = acquisition; frame.intensity_correction_factor = 1.0 / sql_frame.accumulation_time; if (acquisition == AcquisitionType::DIAPASEF) @@ -239,14 +313,23 @@ fn get_frame_without_data( #[derive(Debug, thiserror::Error)] pub enum FrameReaderError { + #[cfg(feature = "timscompress")] + #[error("Timscompress error")] + TimscompressError, #[error("{0}")] TdfBlobReaderError(#[from] TdfBlobReaderError), #[error("{0}")] + MetadataReaderError(#[from] MetadataReaderError), + #[error("{0}")] FileNotFound(String), #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("Corrupt Frame")] CorruptFrame, #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), + #[error("Index out of bounds")] + IndexOutOfBounds, + #[error("Compression type {0} not understood")] + CompressionTypeError(u8), } diff --git a/src/io/readers/metadata_reader.rs b/src/io/readers/metadata_reader.rs index 8944001..a5ba1db 100644 --- a/src/io/readers/metadata_reader.rs +++ b/src/io/readers/metadata_reader.rs @@ -1,12 +1,15 @@ -use std::{collections::HashMap, fmt::Debug, path::Path, str::FromStr}; +use std::{collections::HashMap, fmt::Debug, str::FromStr}; use crate::{ domain_converters::{Frame2RtConverter, Scan2ImConverter, Tof2MzConverter}, ms_data::Metadata, }; -use super::file_readers::sql_reader::{ - metadata::SqlMetadata, ReadableSqlHashMap, SqlError, SqlReader, +use super::{ + file_readers::sql_reader::{ + metadata::SqlMetadata, ReadableSqlHashMap, SqlReader, SqlReaderError, + }, + TimsTofPathLike, }; const OTOF_CONTROL: &str = "Bruker otofControl"; @@ -15,10 +18,9 @@ pub struct MetadataReader; impl MetadataReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(path)?; let sql_metadata: HashMap = SqlMetadata::from_sql_reader(&tdf_sql_reader)?; let compression_type = @@ -40,7 +42,6 @@ impl MetadataReader { .max_by(|a, b| a.partial_cmp(b).unwrap()) .unwrap(); let metadata = Metadata { - path: path.as_ref().to_path_buf(), rt_converter: Frame2RtConverter::from_values(rt_values), im_converter: get_im_converter(&sql_metadata, &tdf_sql_reader)?, mz_converter: get_mz_converter(&sql_metadata)?, @@ -97,7 +98,10 @@ fn get_im_converter( ) -> Result { let scan_counts: Vec = tdf_sql_reader.read_column_from_table("NumScans", "Frames")?; - let scan_max_index = *scan_counts.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds + let scan_max_index = *scan_counts + .iter() + .max() + .expect("SqlReader cannot return empty vecs, so there is always a max scan index"); let (im_min, im_max) = get_im_bounds(sql_metadata)?; Ok(Scan2ImConverter::from_boundaries( im_min, @@ -120,12 +124,8 @@ fn parse_value( #[derive(Debug, thiserror::Error)] pub enum MetadataReaderError { - // #[error("{0}")] - // TdfBlobReaderError(#[from] TdfBlobReaderError), - // #[error("{0}")] - // FileNotFound(String), #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("Key not found: {0}")] KeyNotFound(String), #[error("Key not parsable: {0}")] diff --git a/src/io/readers/precursor_reader.rs b/src/io/readers/precursor_reader.rs index 04f0324..612456d 100644 --- a/src/io/readers/precursor_reader.rs +++ b/src/io/readers/precursor_reader.rs @@ -4,7 +4,6 @@ mod minitdf; mod tdf; use core::fmt; -use std::path::{Path, PathBuf}; #[cfg(feature = "minitdf")] use minitdf::{MiniTDFPrecursorReader, MiniTDFPrecursorReaderError}; @@ -15,6 +14,7 @@ use crate::ms_data::Precursor; #[cfg(feature = "tdf")] use super::FrameWindowSplittingConfiguration; +use super::{TimsTofFileType, TimsTofPath, TimsTofPathError, TimsTofPathLike}; pub struct PrecursorReader { precursor_reader: Box, @@ -31,7 +31,9 @@ impl PrecursorReader { PrecursorReaderBuilder::default() } - pub fn new(path: impl AsRef) -> Result { + pub fn new( + path: impl TimsTofPathLike, + ) -> Result { Self::build().with_path(path).finalize() } @@ -46,15 +48,17 @@ impl PrecursorReader { #[derive(Debug, Default, Clone)] pub struct PrecursorReaderBuilder { - path: PathBuf, + path: Option, #[cfg(feature = "tdf")] config: FrameWindowSplittingConfiguration, } impl PrecursorReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { + pub fn with_path(&self, path: impl TimsTofPathLike) -> Self { + // TODO + let path = Some(path.to_timstof_path().unwrap()); Self { - path: path.as_ref().to_path_buf(), + path, ..self.clone() } } @@ -70,22 +74,20 @@ impl PrecursorReaderBuilder { } } - pub fn finalize(&self) -> Result { + pub fn finalize(self) -> Result { + let path = match self.path { + None => return Err(PrecursorReaderError::NoPath), + Some(path) => path, + }; let precursor_reader: Box = - match self.path.extension().and_then(|e| e.to_str()) { + match path.file_type() { #[cfg(feature = "minitdf")] - Some("parquet") => { - Box::new(MiniTDFPrecursorReader::new(self.path.clone())?) + TimsTofFileType::MiniTDF => { + Box::new(MiniTDFPrecursorReader::new(path)?) }, #[cfg(feature = "tdf")] - Some("tdf") => Box::new(TDFPrecursorReader::new( - self.path.clone(), - self.config.clone(), - )?), - _ => { - return Err(PrecursorReaderError::PrecursorReaderFileError( - self.path.clone(), - )) + TimsTofFileType::TDF => { + Box::new(TDFPrecursorReader::new(path, self.config)?) }, }; let reader = PrecursorReader { precursor_reader }; @@ -106,6 +108,8 @@ pub enum PrecursorReaderError { #[cfg(feature = "tdf")] #[error("{0}")] TDFPrecursorReaderError(#[from] TDFPrecursorReaderError), - #[error("File {0} not valid")] - PrecursorReaderFileError(PathBuf), + #[error("No path provided")] + NoPath, + #[error("{0}")] + TimsTofPathError(#[from] TimsTofPathError), } diff --git a/src/io/readers/precursor_reader/minitdf.rs b/src/io/readers/precursor_reader/minitdf.rs index be11ee0..70c2c25 100644 --- a/src/io/readers/precursor_reader/minitdf.rs +++ b/src/io/readers/precursor_reader/minitdf.rs @@ -1,10 +1,9 @@ -use std::path::Path; - use crate::{ io::readers::file_readers::parquet_reader::{ - precursors::ParquetPrecursor, ParquetError, ReadableParquetTable, + precursors::ParquetPrecursor, ParquetReaderError, ReadableParquetTable, }, ms_data::Precursor, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -16,9 +15,9 @@ pub struct MiniTDFPrecursorReader { impl MiniTDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let parquet_precursors = ParquetPrecursor::from_parquet_file(&path)?; + let parquet_precursors = ParquetPrecursor::from_parquet_file(path)?; let reader = Self { parquet_precursors }; Ok(reader) } @@ -46,4 +45,4 @@ impl PrecursorReaderTrait for MiniTDFPrecursorReader { #[derive(thiserror::Error, Debug)] #[error("{0}")] -pub struct MiniTDFPrecursorReaderError(#[from] ParquetError); +pub struct MiniTDFPrecursorReaderError(#[from] ParquetReaderError); diff --git a/src/io/readers/precursor_reader/tdf.rs b/src/io/readers/precursor_reader/tdf.rs index 60d179d..1a37c6d 100644 --- a/src/io/readers/precursor_reader/tdf.rs +++ b/src/io/readers/precursor_reader/tdf.rs @@ -1,17 +1,16 @@ mod dda; mod dia; -use std::path::Path; - use dda::{DDATDFPrecursorReader, DDATDFPrecursorReaderError}; use dia::{DIATDFPrecursorReader, DIATDFPrecursorReaderError}; use crate::{ io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameWindowSplittingConfiguration, }, ms_data::{AcquisitionType, Precursor}, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -22,11 +21,10 @@ pub struct TDFPrecursorReader { impl TDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, splitting_strategy: FrameWindowSplittingConfiguration, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let sql_frames: Vec = tdf_sql_reader.read_column_from_table("ScanMode", "Frames")?; let acquisition_type = if sql_frames.iter().any(|&x| x == 8) { @@ -39,7 +37,7 @@ impl TDFPrecursorReader { let precursor_reader: Box = match acquisition_type { AcquisitionType::DDAPASEF => { - Box::new(DDATDFPrecursorReader::new(path)?) + Box::new(DDATDFPrecursorReader::new(&path)?) }, AcquisitionType::DIAPASEF => Box::new( DIATDFPrecursorReader::new(path, splitting_strategy)?, @@ -70,7 +68,7 @@ impl PrecursorReaderTrait for TDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum TDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] DDATDFPrecursorReaderError(#[from] DDATDFPrecursorReaderError), #[error("{0}")] diff --git a/src/io/readers/precursor_reader/tdf/dda.rs b/src/io/readers/precursor_reader/tdf/dda.rs index 80cf641..dc41e9d 100644 --- a/src/io/readers/precursor_reader/tdf/dda.rs +++ b/src/io/readers/precursor_reader/tdf/dda.rs @@ -1,16 +1,16 @@ -use std::path::Path; - use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ file_readers::sql_reader::{ - precursors::SqlPrecursor, ReadableSqlTable, SqlError, SqlReader, + precursors::SqlPrecursor, ReadableSqlTable, SqlReader, + SqlReaderError, }, MetadataReader, MetadataReaderError, }, ms_data::Precursor, + readers::TimsTofPathLike, }; use super::PrecursorReaderTrait; @@ -24,10 +24,9 @@ pub struct DDATDFPrecursorReader { impl DDATDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; @@ -66,7 +65,7 @@ impl PrecursorReaderTrait for DDATDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum DDATDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), } diff --git a/src/io/readers/precursor_reader/tdf/dia.rs b/src/io/readers/precursor_reader/tdf/dia.rs index 722b80c..e3d0c29 100644 --- a/src/io/readers/precursor_reader/tdf/dia.rs +++ b/src/io/readers/precursor_reader/tdf/dia.rs @@ -1,16 +1,14 @@ -use std::path::Path; - -use crate::io::readers::FrameWindowSplittingConfiguration; use crate::{ domain_converters::{ ConvertableDomain, Frame2RtConverter, Scan2ImConverter, }, io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, MetadataReader, MetadataReaderError, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::{Precursor, QuadrupoleSettings}, + readers::{FrameWindowSplittingConfiguration, TimsTofPathLike}, }; use super::PrecursorReaderTrait; @@ -24,11 +22,10 @@ pub struct DIATDFPrecursorReader { impl DIATDFPrecursorReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, splitting_config: FrameWindowSplittingConfiguration, ) -> Result { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let metadata = MetadataReader::new(&path)?; let rt_converter: Frame2RtConverter = metadata.rt_converter; let im_converter: Scan2ImConverter = metadata.im_converter; @@ -73,7 +70,7 @@ impl PrecursorReaderTrait for DIATDFPrecursorReader { #[derive(Debug, thiserror::Error)] pub enum DIATDFPrecursorReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] MetadataReaderError(#[from] MetadataReaderError), #[error("{0}")] diff --git a/src/io/readers/quad_settings_reader.rs b/src/io/readers/quad_settings_reader.rs index eb67dfc..140905b 100644 --- a/src/io/readers/quad_settings_reader.rs +++ b/src/io/readers/quad_settings_reader.rs @@ -1,6 +1,5 @@ #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; -use std::path::Path; use crate::{ domain_converters::{ConvertableDomain, Scan2ImConverter}, @@ -8,9 +7,12 @@ use crate::{ utils::vec_utils::argsort, }; -use super::file_readers::sql_reader::{ - frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, - ReadableSqlTable, SqlError, SqlReader, +use super::{ + file_readers::sql_reader::{ + frame_groups::SqlWindowGroup, quad_settings::SqlQuadSettings, + ReadableSqlTable, SqlReader, SqlReaderError, + }, + TimsTofPathLike, }; pub struct QuadrupoleSettingsReader { @@ -21,10 +23,9 @@ pub struct QuadrupoleSettingsReader { impl QuadrupoleSettingsReader { // TODO: refactor due to large size pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result, QuadrupoleSettingsReaderError> { - let sql_path = path.as_ref(); - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(path)?; Self::from_sql_settings(&tdf_sql_reader) } @@ -37,7 +38,8 @@ impl QuadrupoleSettingsReader { .iter() .map(|x| x.window_group) .max() - .unwrap() as usize; // SqlReader cannot return empty vecs, so always succeeds + .expect("SqlReader cannot return empty vecs, so there is always a max window_group") + as usize; let quadrupole_settings = (0..window_group_count) .map(|window_group| { let mut quad = QuadrupoleSettings::default(); @@ -122,7 +124,7 @@ impl QuadrupoleSettingsReader { #[derive(Debug, thiserror::Error)] pub enum QuadrupoleSettingsReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), } type MobilitySpanStep = (f64, f64); @@ -306,9 +308,18 @@ fn expand_window_settings( let window = window_group.window_group; let frame = window_group.frame; let group = &quadrupole_settings[window as usize - 1]; - let window_group_start = - group.scan_starts.iter().min().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds - let window_group_end = group.scan_ends.iter().max().unwrap().clone(); // SqlReader cannot return empty vecs, so always succeeds + let window_group_start = group + .scan_starts + .iter() + .min() + .expect("SqlReader cannot return empty vecs, so there is always min window_group index") + .clone(); + let window_group_end = group + .scan_ends + .iter() + .max() + .expect("SqlReader cannot return empty vecs, so there is always max window_group index") + .clone(); for (sws, swe) in scan_range_subsplit(window_group_start, window_group_end, &strategy) { diff --git a/src/io/readers/spectrum_reader.rs b/src/io/readers/spectrum_reader.rs index 7b125ab..ce80658 100644 --- a/src/io/readers/spectrum_reader.rs +++ b/src/io/readers/spectrum_reader.rs @@ -1,51 +1,39 @@ +mod builder; +mod config; +mod errors; #[cfg(feature = "minitdf")] mod minitdf; +mod spectrum_trait; #[cfg(feature = "tdf")] mod tdf; -use core::fmt; - -#[cfg(feature = "minitdf")] -use minitdf::{MiniTDFSpectrumReader, MiniTDFSpectrumReaderError}; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; -#[cfg(feature = "serialize")] -use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; -#[cfg(feature = "tdf")] -use tdf::{TDFSpectrumReader, TDFSpectrumReaderError}; - +use super::TimsTofPathLike; use crate::ms_data::Spectrum; - -#[cfg(feature = "tdf")] -use super::FrameWindowSplittingConfiguration; +pub use builder::SpectrumReaderBuilder; +pub use config::{SpectrumProcessingParams, SpectrumReaderConfig}; +pub use errors::SpectrumReaderError; +use rayon::prelude::*; +use spectrum_trait::SpectrumReaderTrait; pub struct SpectrumReader { spectrum_reader: Box, } -impl fmt::Debug for SpectrumReader { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SpectrumReader {{ /* fields omitted */ }}") +impl SpectrumReader { + pub fn new( + path: impl TimsTofPathLike, + ) -> Result { + Self::build().with_path(path).finalize() } -} -impl SpectrumReader { pub fn build() -> SpectrumReaderBuilder { SpectrumReaderBuilder::default() } - pub fn new(path: impl AsRef) -> Result { - Self::build().with_path(path).finalize() - } - pub fn get(&self, index: usize) -> Result { self.spectrum_reader.get(index) } - pub fn get_path(&self) -> PathBuf { - self.spectrum_reader.get_path() - } - pub fn len(&self) -> usize { self.spectrum_reader.len() } @@ -70,97 +58,3 @@ impl SpectrumReader { self.spectrum_reader.calibrate(); } } - -#[derive(Debug, Default, Clone)] -pub struct SpectrumReaderBuilder { - path: PathBuf, - config: SpectrumReaderConfig, -} - -impl SpectrumReaderBuilder { - pub fn with_path(&self, path: impl AsRef) -> Self { - Self { - path: path.as_ref().to_path_buf(), - ..self.clone() - } - } - - pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { - Self { - config: config, - ..self.clone() - } - } - - pub fn finalize(&self) -> Result { - let spectrum_reader: Box = - match self.path.extension().and_then(|e| e.to_str()) { - #[cfg(feature = "minitdf")] - Some("ms2") => { - Box::new(MiniTDFSpectrumReader::new(self.path.clone())?) - }, - #[cfg(feature = "tdf")] - Some("d") => Box::new(TDFSpectrumReader::new( - self.path.clone(), - self.config.clone(), - )?), - _ => { - return Err(SpectrumReaderError::SpectrumReaderFileError( - self.path.clone(), - )) - }, - }; - let mut reader = SpectrumReader { spectrum_reader }; - if self.config.spectrum_processing_params.calibrate { - reader.calibrate(); - } - Ok(reader) - } -} - -trait SpectrumReaderTrait: Sync + Send { - fn get(&self, index: usize) -> Result; - fn get_path(&self) -> PathBuf; - fn len(&self) -> usize; - fn calibrate(&mut self); -} - -#[derive(Debug, thiserror::Error)] -pub enum SpectrumReaderError { - #[cfg(feature = "minitdf")] - #[error("{0}")] - MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), - #[cfg(feature = "tdf")] - #[error("{0}")] - TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), - #[error("File {0} not valid")] - SpectrumReaderFileError(PathBuf), -} - -#[derive(Debug, Clone, Copy)] -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -pub struct SpectrumProcessingParams { - pub smoothing_window: u32, - pub centroiding_window: u32, - pub calibration_tolerance: f64, - pub calibrate: bool, -} - -impl Default for SpectrumProcessingParams { - fn default() -> Self { - Self { - smoothing_window: 1, - centroiding_window: 1, - calibration_tolerance: 0.1, - calibrate: false, - } - } -} - -#[derive(Debug, Default, Clone, Copy)] -#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] -pub struct SpectrumReaderConfig { - pub spectrum_processing_params: SpectrumProcessingParams, - #[cfg(feature = "tdf")] - pub frame_splitting_params: FrameWindowSplittingConfiguration, -} diff --git a/src/io/readers/spectrum_reader/builder.rs b/src/io/readers/spectrum_reader/builder.rs new file mode 100644 index 0000000..b191af6 --- /dev/null +++ b/src/io/readers/spectrum_reader/builder.rs @@ -0,0 +1,58 @@ +use crate::readers::{TimsTofFileType, TimsTofPath, TimsTofPathLike}; + +use super::{ + errors::SpectrumReaderError, SpectrumReader, SpectrumReaderConfig, + SpectrumReaderTrait, +}; + +#[cfg(feature = "minitdf")] +use super::minitdf::MiniTDFSpectrumReader; +#[cfg(feature = "tdf")] +use super::tdf::TDFSpectrumReader; + +#[derive(Debug, Default, Clone)] +pub struct SpectrumReaderBuilder { + path: Option, + config: SpectrumReaderConfig, +} + +impl SpectrumReaderBuilder { + pub fn with_path(&self, path: impl TimsTofPathLike) -> Self { + // TODO + let path = Some(path.to_timstof_path().unwrap()); + Self { + path, + ..self.clone() + } + } + + pub fn with_config(&self, config: SpectrumReaderConfig) -> Self { + Self { + config: config, + ..self.clone() + } + } + + pub fn finalize(self) -> Result { + let path = match self.path { + None => return Err(SpectrumReaderError::NoPath), + Some(path) => path, + }; + let spectrum_reader: Box = + match path.file_type() { + #[cfg(feature = "minitdf")] + TimsTofFileType::MiniTDF => { + Box::new(MiniTDFSpectrumReader::new(path)?) + }, + #[cfg(feature = "tdf")] + TimsTofFileType::TDF => { + Box::new(TDFSpectrumReader::new(path, self.config)?) + }, + }; + let mut reader = SpectrumReader { spectrum_reader }; + if self.config.spectrum_processing_params.calibrate { + reader.calibrate(); + } + Ok(reader) + } +} diff --git a/src/io/readers/spectrum_reader/config.rs b/src/io/readers/spectrum_reader/config.rs new file mode 100644 index 0000000..d387b49 --- /dev/null +++ b/src/io/readers/spectrum_reader/config.rs @@ -0,0 +1,33 @@ +#[cfg(feature = "tdf")] +use super::super::FrameWindowSplittingConfiguration; + +#[cfg(feature = "serialize")] +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy)] +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +pub struct SpectrumProcessingParams { + pub smoothing_window: u32, + pub centroiding_window: u32, + pub calibration_tolerance: f64, + pub calibrate: bool, +} + +impl Default for SpectrumProcessingParams { + fn default() -> Self { + Self { + smoothing_window: 1, + centroiding_window: 1, + calibration_tolerance: 0.1, + calibrate: false, + } + } +} + +#[derive(Debug, Default, Clone, Copy)] +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +pub struct SpectrumReaderConfig { + pub spectrum_processing_params: SpectrumProcessingParams, + #[cfg(feature = "tdf")] + pub frame_splitting_params: FrameWindowSplittingConfiguration, +} diff --git a/src/io/readers/spectrum_reader/errors.rs b/src/io/readers/spectrum_reader/errors.rs new file mode 100644 index 0000000..1782645 --- /dev/null +++ b/src/io/readers/spectrum_reader/errors.rs @@ -0,0 +1,16 @@ +#[cfg(feature = "minitdf")] +use super::minitdf::MiniTDFSpectrumReaderError; +#[cfg(feature = "tdf")] +use super::tdf::TDFSpectrumReaderError; + +#[derive(Debug, thiserror::Error)] +pub enum SpectrumReaderError { + #[cfg(feature = "minitdf")] + #[error("{0}")] + MiniTDFSpectrumReaderError(#[from] MiniTDFSpectrumReaderError), + #[cfg(feature = "tdf")] + #[error("{0}")] + TDFSpectrumReaderError(#[from] TDFSpectrumReaderError), + #[error("No path provided")] + NoPath, +} diff --git a/src/io/readers/spectrum_reader/minitdf.rs b/src/io/readers/spectrum_reader/minitdf.rs index e1b5da9..00375b3 100644 --- a/src/io/readers/spectrum_reader/minitdf.rs +++ b/src/io/readers/spectrum_reader/minitdf.rs @@ -1,10 +1,8 @@ -use std::path::{Path, PathBuf}; - use crate::{ io::readers::{ file_readers::{ parquet_reader::{ - precursors::ParquetPrecursor, ParquetError, + precursors::ParquetPrecursor, ParquetReaderError, ReadableParquetTable, }, tdf_blob_reader::{ @@ -14,14 +12,13 @@ use crate::{ PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, - utils::find_extension, + readers::TimsTofPathLike, }; use super::{SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct MiniTDFSpectrumReader { - path: PathBuf, precursor_reader: PrecursorReader, blob_reader: IndexedTdfBlobReader, collision_energies: Vec, @@ -29,32 +26,20 @@ pub struct MiniTDFSpectrumReader { impl MiniTDFSpectrumReader { pub fn new( - path: impl AsRef, + path: impl TimsTofPathLike, ) -> Result { - let parquet_file_name = find_extension(&path, "ms2spectrum.parquet") - .ok_or(MiniTDFSpectrumReaderError::FileNotFound( - "analysis.tdf".to_string(), - ))?; - let precursor_reader = PrecursorReader::build() - .with_path(&parquet_file_name) - .finalize()?; - let offsets = ParquetPrecursor::from_parquet_file(&parquet_file_name)? + let precursor_reader = + PrecursorReader::build().with_path(&path).finalize()?; + let offsets = ParquetPrecursor::from_parquet_file(&path)? .iter() .map(|x| x.offset as usize) .collect(); - let collision_energies = - ParquetPrecursor::from_parquet_file(&parquet_file_name)? - .iter() - .map(|x| x.collision_energy) - .collect(); - let bin_file_name = find_extension(&path, "bin").ok_or( - MiniTDFSpectrumReaderError::FileNotFound( - "analysis.tdf".to_string(), - ), - )?; - let blob_reader = IndexedTdfBlobReader::new(&bin_file_name, offsets)?; + let collision_energies = ParquetPrecursor::from_parquet_file(&path)? + .iter() + .map(|x| x.collision_energy) + .collect(); + let blob_reader = IndexedTdfBlobReader::new(&path, offsets)?; let reader = Self { - path: path.as_ref().to_path_buf(), precursor_reader, blob_reader, collision_energies, @@ -70,12 +55,7 @@ impl MiniTDFSpectrumReader { spectrum.index = index; let blob = self.blob_reader.get(index)?; if !blob.is_empty() { - let size: usize = blob.len(); - let spectrum_data: Vec = (0..size) - .map(|i| { - blob.get(i).ok_or(MiniTDFSpectrumReaderError::BlobError) - }) - .collect::, _>>()?; + let spectrum_data: Vec = blob.get_all(); let scan_count: usize = blob.len() / 3; let tof_indices_bytes: &[u32] = &spectrum_data[..scan_count as usize * 2]; @@ -117,10 +97,6 @@ impl SpectrumReaderTrait for MiniTDFSpectrumReader { self.precursor_reader.len() } - fn get_path(&self) -> PathBuf { - self.path.clone() - } - fn calibrate(&mut self) {} } @@ -129,7 +105,7 @@ pub enum MiniTDFSpectrumReaderError { #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] - ParquetError(#[from] ParquetError), + ParquetReaderError(#[from] ParquetReaderError), #[error("{0}")] IndexedTdfBlobReaderError(#[from] IndexedTdfBlobReaderError), #[error("{0}")] diff --git a/src/io/readers/spectrum_reader/spectrum_trait.rs b/src/io/readers/spectrum_reader/spectrum_trait.rs new file mode 100644 index 0000000..c05b8ea --- /dev/null +++ b/src/io/readers/spectrum_reader/spectrum_trait.rs @@ -0,0 +1,9 @@ +use crate::Spectrum; + +use super::errors::SpectrumReaderError; + +pub(crate) trait SpectrumReaderTrait: Sync + Send { + fn get(&self, index: usize) -> Result; + fn len(&self) -> usize; + fn calibrate(&mut self); +} diff --git a/src/io/readers/spectrum_reader/tdf.rs b/src/io/readers/spectrum_reader/tdf.rs index c230040..77af731 100644 --- a/src/io/readers/spectrum_reader/tdf.rs +++ b/src/io/readers/spectrum_reader/tdf.rs @@ -4,24 +4,22 @@ mod raw_spectra; use raw_spectra::{RawSpectrum, RawSpectrumReader, RawSpectrumReaderError}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; -use std::path::{Path, PathBuf}; use crate::{ domain_converters::{ConvertableDomain, Tof2MzConverter}, io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameReader, FrameReaderError, MetadataReader, MetadataReaderError, PrecursorReader, PrecursorReaderError, }, ms_data::Spectrum, - utils::find_extension, + readers::TimsTofPathLike, }; use super::{SpectrumReaderConfig, SpectrumReaderError, SpectrumReaderTrait}; #[derive(Debug)] pub struct TDFSpectrumReader { - path: PathBuf, precursor_reader: PrecursorReader, mz_reader: Tof2MzConverter, raw_spectrum_reader: RawSpectrumReader, @@ -30,18 +28,15 @@ pub struct TDFSpectrumReader { impl TDFSpectrumReader { pub fn new( - path_name: impl AsRef, + path: impl TimsTofPathLike, config: SpectrumReaderConfig, ) -> Result { - let frame_reader: FrameReader = FrameReader::new(&path_name)?; - let sql_path = find_extension(&path_name, "analysis.tdf").ok_or( - TDFSpectrumReaderError::FileNotFound("analysis.tdf".to_string()), - )?; - let metadata = MetadataReader::new(&sql_path)?; + let frame_reader: FrameReader = FrameReader::new(&path)?; + let metadata = MetadataReader::new(&path)?; let mz_reader: Tof2MzConverter = metadata.mz_converter; - let tdf_sql_reader = SqlReader::open(&sql_path)?; + let tdf_sql_reader = SqlReader::open(&path)?; let precursor_reader = PrecursorReader::build() - .with_path(&sql_path) + .with_path(&path) .with_config(config.frame_splitting_params) .finalize()?; let acquisition_type = frame_reader.get_acquisition(); @@ -55,7 +50,6 @@ impl TDFSpectrumReader { splitting_strategy, )?; let reader = Self { - path: path_name.as_ref().to_path_buf(), precursor_reader, mz_reader, raw_spectrum_reader, @@ -103,10 +97,6 @@ impl SpectrumReaderTrait for TDFSpectrumReader { self.raw_spectrum_reader.len() } - fn get_path(&self) -> PathBuf { - self.path.clone() - } - fn calibrate(&mut self) { let hits: Vec<(f64, u32)> = (0..self.precursor_reader.len()) .into_par_iter() @@ -143,7 +133,7 @@ impl SpectrumReaderTrait for TDFSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum TDFSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] PrecursorReaderError(#[from] PrecursorReaderError), #[error("{0}")] diff --git a/src/io/readers/spectrum_reader/tdf/dda.rs b/src/io/readers/spectrum_reader/tdf/dda.rs index 2434192..db8f6c8 100644 --- a/src/io/readers/spectrum_reader/tdf/dda.rs +++ b/src/io/readers/spectrum_reader/tdf/dda.rs @@ -1,8 +1,8 @@ use crate::{ io::readers::{ file_readers::sql_reader::{ - pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlError, - SqlReader, + pasef_frame_msms::SqlPasefFrameMsMs, ReadableSqlTable, SqlReader, + SqlReaderError, }, FrameReader, FrameReaderError, }, @@ -30,7 +30,10 @@ impl DDARawSpectrumReader { let pasef_precursors = &pasef_frames.iter().map(|x| x.precursor).collect(); let order: Vec = argsort(&pasef_precursors); - let max_precursor = pasef_precursors.iter().max().unwrap(); // SqlReader cannot return empty vecs, so always succeeds + let max_precursor = pasef_precursors + .iter() + .max() + .expect("SqlReader cannot return empty vecs, so there is always a max precursor index"); let mut offsets: Vec = Vec::with_capacity(max_precursor + 1); offsets.push(0); for (offset, &index) in order.iter().enumerate().take(order.len() - 1) { @@ -118,7 +121,7 @@ impl RawSpectrumReaderTrait for DDARawSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum DDARawSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] FrameReaderError(#[from] FrameReaderError), } diff --git a/src/io/readers/spectrum_reader/tdf/dia.rs b/src/io/readers/spectrum_reader/tdf/dia.rs index 3dad26c..cc1357d 100644 --- a/src/io/readers/spectrum_reader/tdf/dia.rs +++ b/src/io/readers/spectrum_reader/tdf/dia.rs @@ -2,7 +2,7 @@ use crate::io::readers::quad_settings_reader::FrameWindowSplittingStrategy; use crate::io::readers::FrameReaderError; use crate::{ io::readers::{ - file_readers::sql_reader::{SqlError, SqlReader}, + file_readers::sql_reader::{SqlReader, SqlReaderError}, FrameReader, QuadrupoleSettingsReader, QuadrupoleSettingsReaderError, }, ms_data::QuadrupoleSettings, @@ -83,7 +83,7 @@ impl RawSpectrumReaderTrait for DIARawSpectrumReader { #[derive(Debug, thiserror::Error)] pub enum DIARawSpectrumReaderError { #[error("{0}")] - SqlError(#[from] SqlError), + SqlReaderError(#[from] SqlReaderError), #[error("{0}")] QuadrupoleSettingsReaderError(#[from] QuadrupoleSettingsReaderError), #[error("{0}")] diff --git a/src/io/readers/timstof.rs b/src/io/readers/timstof.rs new file mode 100644 index 0000000..4b8e65a --- /dev/null +++ b/src/io/readers/timstof.rs @@ -0,0 +1,140 @@ +use std::{ + fs, io, + path::{Path, PathBuf}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum TimsTofFileType { + #[cfg(feature = "minitdf")] + MiniTDF, + #[cfg(feature = "tdf")] + TDF, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TimsTofPath { + path: PathBuf, + file_type: TimsTofFileType, +} + +impl TimsTofPath { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().canonicalize()?; + #[cfg(feature = "tdf")] + if tdf(&path).is_ok() & tdf_bin(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::TDF, + }); + } + #[cfg(feature = "minitdf")] + if ms2_bin(&path).is_ok() & ms2_parquet(&path).is_ok() { + return Ok(Self { + path, + file_type: TimsTofFileType::MiniTDF, + }); + } + match path.parent() { + Some(parent) => match Self::new(parent) { + Ok(result) => Ok(result), + Err(_) => Err(TimsTofPathError::UnknownType(path)), + }, + None => return Err(TimsTofPathError::UnknownType(path)), + } + } + + pub fn tdf(&self) -> Result { + tdf(self) + } + + pub fn tdf_bin(&self) -> Result { + tdf_bin(self) + } + + pub fn ms2_bin(&self) -> Result { + ms2_bin(self) + } + + pub fn ms2_parquet(&self) -> Result { + ms2_parquet(self) + } + + pub fn file_type(&self) -> TimsTofFileType { + self.file_type + } +} + +fn tdf(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf") +} + +fn tdf_bin(path: impl AsRef) -> Result { + find_extension(path, "analysis.tdf_bin") +} + +fn ms2_bin(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.bin") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.bin"), + // } + // find_extension(path, "ms2.bin") + find_extension(path, "ms2spectrum.bin") +} + +fn ms2_parquet(path: impl AsRef) -> Result { + // match find_extension(path, "ms2.parquet") { + // Ok(result) => Ok(result), + // Err(_) => find_extension(path, "ms2spectrum.parquet"), + // } + // find_extension(path, "ms2.parquet") + find_extension(path, "ms2spectrum.parquet") +} + +fn find_extension( + path: impl AsRef, + extension: &str, +) -> Result { + let extension_lower = extension.to_lowercase(); + for entry in fs::read_dir(&path)? { + if let Ok(entry) = entry { + let file_path = entry.path(); + if let Some(file_name) = + file_path.file_name().and_then(|name| name.to_str()) + { + if file_name.to_lowercase().ends_with(&extension_lower) { + return Ok(file_path); + } + } + } + } + Err(TimsTofPathError::Extension( + extension.to_string(), + path.as_ref().to_path_buf(), + )) +} + +impl AsRef for TimsTofPath { + fn as_ref(&self) -> &Path { + &self.path + } +} + +pub trait TimsTofPathLike: AsRef { + fn to_timstof_path(&self) -> Result; +} + +impl> TimsTofPathLike for T { + fn to_timstof_path(&self) -> Result { + TimsTofPath::new(&self) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum TimsTofPathError { + #[error("Extension {0} not found for {1}")] + Extension(String, PathBuf), + #[error("{0}")] + IO(#[from] io::Error), + #[error("No valid type found for {0}")] + UnknownType(PathBuf), +} diff --git a/src/io/writers/mgf.rs b/src/io/writers/mgf.rs index 715a5ed..a44282b 100644 --- a/src/io/writers/mgf.rs +++ b/src/io/writers/mgf.rs @@ -1,8 +1,5 @@ -use std::fs::File; -use std::io::Write; -use std::path::Path; - -use crate::ms_data::Spectrum; +use crate::Spectrum; +use std::{fs::File, io::Write, path::Path}; pub struct MGFWriter; diff --git a/src/ms_data/frames.rs b/src/ms_data/frames.rs index 6cf7fd1..50b521f 100644 --- a/src/ms_data/frames.rs +++ b/src/ms_data/frames.rs @@ -8,7 +8,7 @@ pub struct Frame { pub tof_indices: Vec, pub intensities: Vec, pub index: usize, - pub rt: f64, + pub rt_in_seconds: f64, pub acquisition_type: AcquisitionType, pub ms_level: MSLevel, pub quadrupole_settings: Arc, diff --git a/src/ms_data/metadata.rs b/src/ms_data/metadata.rs index 8e78364..06a3ead 100644 --- a/src/ms_data/metadata.rs +++ b/src/ms_data/metadata.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use crate::domain_converters::{ Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }; @@ -8,7 +6,6 @@ use crate::domain_converters::{ #[derive(Clone, Debug, Default, PartialEq)] pub struct Metadata { - pub path: PathBuf, pub rt_converter: Frame2RtConverter, pub im_converter: Scan2ImConverter, pub mz_converter: Tof2MzConverter, diff --git a/src/ms_data/quadrupole.rs b/src/ms_data/quadrupole.rs index 0d96f61..2f89656 100644 --- a/src/ms_data/quadrupole.rs +++ b/src/ms_data/quadrupole.rs @@ -1,3 +1,5 @@ +use std::hash::{Hash, Hasher}; + /// The quadrupole settings used for fragmentation. #[derive(Clone, Debug, Default, PartialEq)] pub struct QuadrupoleSettings { @@ -9,6 +11,23 @@ pub struct QuadrupoleSettings { pub collision_energy: Vec, } +impl Hash for QuadrupoleSettings { + fn hash(&self, state: &mut H) { + self.index.hash(state); + self.scan_starts.hash(state); + self.scan_ends.hash(state); + for mz in &self.isolation_mz { + mz.to_bits().hash(state); + } + for width in &self.isolation_width { + width.to_bits().hash(state); + } + for energy in &self.collision_energy { + energy.to_bits().hash(state); + } + } +} + impl QuadrupoleSettings { pub fn len(&self) -> usize { self.isolation_mz.len() diff --git a/src/utils.rs b/src/utils.rs index 7021ffd..9aebe98 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,26 +1 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; - pub mod vec_utils; - -pub fn find_extension( - path: impl AsRef, - extension: &str, -) -> Option { - let extension_lower = extension.to_lowercase(); - for entry in fs::read_dir(&path).ok()? { - if let Ok(entry) = entry { - let file_path = entry.path(); - if let Some(file_name) = - file_path.file_name().and_then(|name| name.to_str()) - { - if file_name.to_lowercase().ends_with(&extension_lower) { - return Some(file_path); - } - } - } - } - None -} diff --git a/tests/frame_readers.rs b/tests/frame_readers.rs index 5324360..942f5e5 100644 --- a/tests/frame_readers.rs +++ b/tests/frame_readers.rs @@ -32,7 +32,7 @@ mod tests { tof_indices: (0..10).collect(), intensities: (0..10).map(|x| (x + 1) * 2).collect(), index: 1, - rt: 0.1, + rt_in_seconds: 0.1, ms_level: MSLevel::MS1, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, @@ -45,7 +45,7 @@ mod tests { tof_indices: (36..78).collect(), intensities: (36..78).map(|x| (x + 1) * 2).collect(), index: 3, - rt: 0.3, + rt_in_seconds: 0.3, ms_level: MSLevel::MS1, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, @@ -80,7 +80,7 @@ mod tests { tof_indices: (10..36).collect(), intensities: (10..36).map(|x| (x + 1) * 2).collect(), index: 2, - rt: 0.2, + rt_in_seconds: 0.2, ms_level: MSLevel::MS2, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF, @@ -93,7 +93,7 @@ mod tests { tof_indices: (78..136).collect(), intensities: (78..136).map(|x| (x + 1) * 2).collect(), index: 4, - rt: 0.4, + rt_in_seconds: 0.4, ms_level: MSLevel::MS2, quadrupole_settings: Arc::new(QuadrupoleSettings::default()), acquisition_type: AcquisitionType::DDAPASEF,