Skip to content

Commit 83e97e8

Browse files
committed
deps: remove thread_local dependency
Instead, we copy the 'Pool' implementation from the regex crate. The impetus for this is that this was the last crate using 'thread_local' in ripgrep.
1 parent 65e6ca2 commit 83e97e8

File tree

4 files changed

+876
-32
lines changed

4 files changed

+876
-32
lines changed

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,3 @@ members = ["pcre2-sys"]
1919
libc = "0.2.146"
2020
log = "0.4.19"
2121
pcre2-sys = { version = "0.2.6", path = "pcre2-sys" }
22-
thread_local = "1.1.7"

src/bytes.rs

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1-
use std::{cell::RefCell, collections::HashMap, sync::Arc};
2-
3-
use {
4-
pcre2_sys::{
5-
PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MATCH_INVALID_UTF,
6-
PCRE2_MULTILINE, PCRE2_NEWLINE_ANYCRLF, PCRE2_UCP, PCRE2_UNSET,
7-
PCRE2_UTF,
8-
},
9-
thread_local::ThreadLocal,
1+
use std::{
2+
collections::HashMap,
3+
panic::{RefUnwindSafe, UnwindSafe},
4+
sync::Arc,
5+
};
6+
7+
use pcre2_sys::{
8+
PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MATCH_INVALID_UTF,
9+
PCRE2_MULTILINE, PCRE2_NEWLINE_ANYCRLF, PCRE2_UCP, PCRE2_UNSET, PCRE2_UTF,
1010
};
1111

1212
use crate::{
1313
error::Error,
1414
ffi::{Code, CompileContext, MatchConfig, MatchData},
15+
pool::{Pool, PoolGuard},
1516
};
1617

1718
/// Match represents a single match of a regex in a subject string.
@@ -168,13 +169,21 @@ impl RegexBuilder {
168169
idx.insert(name.to_string(), i);
169170
}
170171
}
172+
let code = Arc::new(code);
173+
let match_data = {
174+
let config = self.config.match_config.clone();
175+
let code = Arc::clone(&code);
176+
let create: MatchDataPoolFn =
177+
Box::new(move || MatchData::new(config.clone(), &code));
178+
Pool::new(create)
179+
};
171180
Ok(Regex {
172181
config: Arc::new(self.config.clone()),
173182
pattern: pattern.to_string(),
174-
code: Arc::new(code),
183+
code,
175184
capture_names: Arc::new(capture_names),
176185
capture_names_idx: Arc::new(idx),
177-
match_data: ThreadLocal::new(),
186+
match_data,
178187
})
179188
}
180189

@@ -356,25 +365,26 @@ pub struct Regex {
356365
capture_names: Arc<Vec<Option<String>>>,
357366
/// A map from capture group name to capture group index.
358367
capture_names_idx: Arc<HashMap<String, usize>>,
359-
/// Mutable scratch data used by PCRE2 during matching.
360-
///
361-
/// We use the same strategy as Rust's regex crate here (well, what it
362-
/// used to do, it now has its own pool), such that each thread gets its
363-
/// own match data to support using a Regex object from multiple threads
364-
/// simultaneously. If some match data doesn't exist for a thread, then a
365-
/// new one is created on demand.
366-
match_data: ThreadLocal<RefCell<MatchData>>,
368+
/// A pool of mutable scratch data used by PCRE2 during matching.
369+
match_data: MatchDataPool,
367370
}
368371

369372
impl Clone for Regex {
370373
fn clone(&self) -> Regex {
374+
let match_data = {
375+
let config = self.config.match_config.clone();
376+
let code = Arc::clone(&self.code);
377+
let create: MatchDataPoolFn =
378+
Box::new(move || MatchData::new(config.clone(), &code));
379+
Pool::new(create)
380+
};
371381
Regex {
372382
config: Arc::clone(&self.config),
373383
pattern: self.pattern.clone(),
374384
code: Arc::clone(&self.code),
375385
capture_names: Arc::clone(&self.capture_names),
376386
capture_names_idx: Arc::clone(&self.capture_names_idx),
377-
match_data: ThreadLocal::new(),
387+
match_data,
378388
}
379389
}
380390
}
@@ -601,10 +611,12 @@ impl Regex {
601611
);
602612

603613
let options = 0;
604-
let match_data = self.match_data();
605-
let mut match_data = match_data.borrow_mut();
614+
let mut match_data = self.match_data();
606615
// SAFETY: We don't use any dangerous PCRE2 options.
607-
Ok(unsafe { match_data.find(&self.code, subject, start, options)? })
616+
let res =
617+
unsafe { match_data.find(&self.code, subject, start, options) };
618+
PoolGuard::put(match_data);
619+
res
608620
}
609621

610622
/// Returns the same as find, but starts the search at the given
@@ -618,7 +630,11 @@ impl Regex {
618630
subject: &'s [u8],
619631
start: usize,
620632
) -> Result<Option<Match<'s>>, Error> {
621-
self.find_at_with_match_data(self.match_data(), subject, start)
633+
let mut match_data = self.match_data();
634+
let res =
635+
self.find_at_with_match_data(&mut match_data, subject, start);
636+
PoolGuard::put(match_data);
637+
res
622638
}
623639

624640
/// Like find_at, but accepts match data instead of acquiring one itself.
@@ -628,7 +644,7 @@ impl Regex {
628644
#[inline(always)]
629645
fn find_at_with_match_data<'s>(
630646
&self,
631-
match_data: &RefCell<MatchData>,
647+
match_data: &mut MatchDataPoolGuard<'_>,
632648
subject: &'s [u8],
633649
start: usize,
634650
) -> Result<Option<Match<'s>>, Error> {
@@ -640,7 +656,6 @@ impl Regex {
640656
);
641657

642658
let options = 0;
643-
let mut match_data = match_data.borrow_mut();
644659
// SAFETY: We don't use any dangerous PCRE2 options.
645660
if unsafe { !match_data.find(&self.code, subject, start, options)? } {
646661
return Ok(None);
@@ -737,9 +752,8 @@ impl Regex {
737752
}
738753
}
739754

740-
fn match_data(&self) -> &RefCell<MatchData> {
741-
let create = || RefCell::new(self.new_match_data());
742-
self.match_data.get_or(create)
755+
fn match_data(&self) -> MatchDataPoolGuard<'_> {
756+
self.match_data.get()
743757
}
744758

745759
fn new_match_data(&self) -> MatchData {
@@ -981,7 +995,7 @@ impl<'s, 'i> std::ops::Index<&'i str> for Captures<'s> {
981995
/// lifetime of the subject string.
982996
pub struct Matches<'r, 's> {
983997
re: &'r Regex,
984-
match_data: &'r RefCell<MatchData>,
998+
match_data: MatchDataPoolGuard<'r>,
985999
subject: &'s [u8],
9861000
last_end: usize,
9871001
last_match: Option<usize>,
@@ -995,7 +1009,7 @@ impl<'r, 's> Iterator for Matches<'r, 's> {
9951009
return None;
9961010
}
9971011
let res = self.re.find_at_with_match_data(
998-
self.match_data,
1012+
&mut self.match_data,
9991013
self.subject,
10001014
self.last_end,
10011015
);
@@ -1073,6 +1087,18 @@ impl<'r, 's> Iterator for CaptureMatches<'r, 's> {
10731087
}
10741088
}
10751089

1090+
/// A type alias for our pool of `MatchData` that fixes the type parameters to
1091+
/// what we actually use in practice.
1092+
type MatchDataPool = Pool<MatchData, MatchDataPoolFn>;
1093+
1094+
/// Same as above, but for the guard returned by a pool.
1095+
type MatchDataPoolGuard<'a> = PoolGuard<'a, MatchData, MatchDataPoolFn>;
1096+
1097+
/// The type of the closure we use to create new caches. We need to spell out
1098+
/// all of the marker traits or else we risk leaking !MARKER impls.
1099+
type MatchDataPoolFn =
1100+
Box<dyn Fn() -> MatchData + Send + Sync + UnwindSafe + RefUnwindSafe>;
1101+
10761102
#[cfg(test)]
10771103
mod tests {
10781104
use super::{Regex, RegexBuilder};

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ are welcome to improve this.
1111

1212
#![deny(missing_docs)]
1313

14+
extern crate alloc;
15+
1416
pub use crate::{
1517
error::{Error, ErrorKind},
1618
ffi::{escape, is_jit_available, version},
@@ -22,3 +24,4 @@ PCRE2 regular expressions for matching on arbitrary bytes.
2224
pub mod bytes;
2325
mod error;
2426
mod ffi;
27+
mod pool;

0 commit comments

Comments
 (0)