From afa5f9469baa837fc156a65944640910cdc3e776 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Apr 2025 10:39:54 -0400 Subject: [PATCH 1/5] Upgrade Rust and dependencies. --- Cargo.lock | 40 ++++++++++++++++++++-------------------- rust-toolchain.toml | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c17202..91aecd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,9 +34,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "heck" @@ -46,9 +46,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "itertools" @@ -61,9 +61,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "memchr" @@ -82,21 +82,21 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] @@ -166,18 +166,18 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -192,12 +192,12 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] name = "unicode-ident" -version = "1.0.16" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unindent" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" diff --git a/rust-toolchain.toml b/rust-toolchain.toml index c7a7250..e60b212 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.84.1" +channel = "1.85.1" components = ["rustfmt", "clippy"] \ No newline at end of file From d0d1ee77bcadbda668f0589add5ac1dfc9df9a4f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Apr 2025 10:41:49 -0400 Subject: [PATCH 2/5] cargo fix --edition --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 45e16d5..bb647c0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -254,14 +254,14 @@ impl PyAhoCorasick { let py = self_.py(); let matches = get_matches(&self_.ac_impl, haystack.as_bytes(), overlapping)?; let matches = py.allow_threads(|| matches.collect::>().into_iter()); - let result = if let Some(ref patterns) = self_.patterns { + let result = match self_.patterns { Some(ref patterns) => { PyList::new_bound(py, matches.map(|m| patterns[m.pattern()].clone_ref(py))) - } else { + } _ => { PyList::new_bound( py, matches.map(|m| PyString::new_bound(py, &haystack[m.start()..m.end()])), ) - }; + }}; Ok(result.into()) } } From a76086273998136d7bb18f5ff56974801c3be03b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Apr 2025 10:42:29 -0400 Subject: [PATCH 3/5] Update edition. --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c148f2b..3ba620f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "ahocorasick_rs" version = "0.22.2" -edition = "2021" +edition = "2024" authors = ["G-Research ", "Itamar Turner-Trauring "] description = "Search a string for multiple substrings at once" readme = "README.md" From 9d31a96266e5ebb5dc6d487cb3875a96b6033a48 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Apr 2025 10:42:53 -0400 Subject: [PATCH 4/5] Reformat. --- src/lib.rs | 63 +++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bb647c0..883bf55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -144,34 +144,37 @@ impl PyAhoCorasick { // Convert the `patterns` iterable into an Iterator over Py: let mut patterns_iter = patterns.iter()?.map_while(|pat| { - pat.and_then(|i| i.downcast_into::().map_err(PyErr::from).map(|i|i.into_py(py))) - .map_or_else( - |e| { - patterns_error.set(Some(e)); - None - }, - Some::>, - ) + pat.and_then(|i| { + i.downcast_into::() + .map_err(PyErr::from) + .map(|i| i.into_py(py)) + }) + .map_or_else( + |e| { + patterns_error.set(Some(e)); + None + }, + Some::>, + ) }); // If store_patterns is None (the default), use a heuristic to decide // whether to store patterns. let mut patterns: Vec> = vec![]; - let store_patterns = store_patterns - .unwrap_or_else(|| { - let mut total = 0; - let mut store_patterns = true; - for s in patterns_iter.by_ref() { - // Highly unlikely that strings will fail to return length, so just expect(). - total += s.bind(py).len().expect("String doesn't have length?"); - patterns.push(s); - if total > 4096 { - store_patterns = false; - break; - } + let store_patterns = store_patterns.unwrap_or_else(|| { + let mut total = 0; + let mut store_patterns = true; + for s in patterns_iter.by_ref() { + // Highly unlikely that strings will fail to return length, so just expect(). + total += s.bind(py).len().expect("String doesn't have length?"); + patterns.push(s); + if total > 4096 { + store_patterns = false; + break; } - store_patterns - }); + } + store_patterns + }); if store_patterns { for s in patterns_iter.by_ref() { @@ -183,7 +186,8 @@ impl PyAhoCorasick { .kind(implementation.map(|i| i.into())) .match_kind(matchkind.into()) .build( - patterns.clone() + patterns + .clone() .into_iter() .chain(patterns_iter) .chunks(10 * 1024) @@ -254,14 +258,15 @@ impl PyAhoCorasick { let py = self_.py(); let matches = get_matches(&self_.ac_impl, haystack.as_bytes(), overlapping)?; let matches = py.allow_threads(|| matches.collect::>().into_iter()); - let result = match self_.patterns { Some(ref patterns) => { - PyList::new_bound(py, matches.map(|m| patterns[m.pattern()].clone_ref(py))) - } _ => { - PyList::new_bound( + let result = match self_.patterns { + Some(ref patterns) => { + PyList::new_bound(py, matches.map(|m| patterns[m.pattern()].clone_ref(py))) + } + _ => PyList::new_bound( py, matches.map(|m| PyString::new_bound(py, &haystack[m.start()..m.end()])), - ) - }}; + ), + }; Ok(result.into()) } } From c7e4b6303a5624207037c7561557d975ccf7fa15 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 1 Apr 2025 10:57:03 -0400 Subject: [PATCH 5/5] Fix PyO3 deprecations. --- src/lib.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 883bf55..06a1311 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -143,11 +143,11 @@ impl PyAhoCorasick { let patterns_error: Cell> = Cell::new(None); // Convert the `patterns` iterable into an Iterator over Py: - let mut patterns_iter = patterns.iter()?.map_while(|pat| { + let mut patterns_iter = patterns.try_iter()?.map_while(|pat| { pat.and_then(|i| { i.downcast_into::() .map_err(PyErr::from) - .map(|i| i.into_py(py)) + .map(|i| i.unbind()) }) .map_or_else( |e| { @@ -250,24 +250,24 @@ impl PyAhoCorasick { /// Return matches as list of patterns (i.e. strings). If ``overlapping`` is /// ``False`` (the default), don't include overlapping results. #[pyo3(signature = (haystack, overlapping = false))] - fn find_matches_as_strings( - self_: PyRef, - haystack: &str, + fn find_matches_as_strings<'py>( + self_: PyRef<'py, Self>, + haystack: &'py str, overlapping: bool, - ) -> PyResult> { + ) -> PyResult> { let py = self_.py(); let matches = get_matches(&self_.ac_impl, haystack.as_bytes(), overlapping)?; let matches = py.allow_threads(|| matches.collect::>().into_iter()); let result = match self_.patterns { Some(ref patterns) => { - PyList::new_bound(py, matches.map(|m| patterns[m.pattern()].clone_ref(py))) + PyList::new(py, matches.map(|m| patterns[m.pattern()].clone_ref(py))) } - _ => PyList::new_bound( + _ => PyList::new( py, - matches.map(|m| PyString::new_bound(py, &haystack[m.start()..m.end()])), + matches.map(|m| PyString::new(py, &haystack[m.start()..m.end()])), ), }; - Ok(result.into()) + result } } @@ -282,7 +282,7 @@ impl<'py> TryFrom> for PyBufferBytes<'py> { // Get a PyBufferBytes from a Python object fn try_from(obj: Bound<'py, PyAny>) -> PyResult { - let buffer = PyBuffer::::get_bound(&obj).map_err(PyErr::from)?; + let buffer = PyBuffer::::get(&obj).map_err(PyErr::from)?; if buffer.dimensions() > 1 { return Err(PyTypeError::new_err( @@ -371,7 +371,7 @@ impl PyBytesAhoCorasick { // Convert the `patterns` iterable into an Iterator over PyBufferBytes let patterns_iter = patterns - .iter()? + .try_iter()? .map_while(|pat| match pat.and_then(PyBufferBytes::try_from) { Ok(pat) => { if pat.as_ref().is_empty() {