1
- use std:: { cell :: RefCell , collections :: HashMap , sync :: Arc } ;
2
-
3
- use {
4
- pcre2_sys :: {
5
- PCRE2_CASELESS , PCRE2_DOTALL , PCRE2_EXTENDED , PCRE2_MATCH_INVALID_UTF ,
6
- PCRE2_MULTILINE , PCRE2_NEWLINE_ANYCRLF , PCRE2_UCP , PCRE2_UNSET ,
7
- PCRE2_UTF ,
8
- } ,
9
- thread_local :: ThreadLocal ,
1
+ use std:: {
2
+ collections :: HashMap ,
3
+ panic :: { RefUnwindSafe , UnwindSafe } ,
4
+ sync :: Arc ,
5
+ } ;
6
+
7
+ use pcre2_sys :: {
8
+ PCRE2_CASELESS , PCRE2_DOTALL , PCRE2_EXTENDED , PCRE2_MATCH_INVALID_UTF ,
9
+ PCRE2_MULTILINE , PCRE2_NEWLINE_ANYCRLF , PCRE2_UCP , PCRE2_UNSET , PCRE2_UTF ,
10
10
} ;
11
11
12
12
use crate :: {
13
13
error:: Error ,
14
14
ffi:: { Code , CompileContext , MatchConfig , MatchData } ,
15
+ pool:: { Pool , PoolGuard } ,
15
16
} ;
16
17
17
18
/// Match represents a single match of a regex in a subject string.
@@ -168,13 +169,21 @@ impl RegexBuilder {
168
169
idx. insert ( name. to_string ( ) , i) ;
169
170
}
170
171
}
172
+ let code = Arc :: new ( code) ;
173
+ let match_data = {
174
+ let config = self . config . match_config . clone ( ) ;
175
+ let code = Arc :: clone ( & code) ;
176
+ let create: MatchDataPoolFn =
177
+ Box :: new ( move || MatchData :: new ( config. clone ( ) , & code) ) ;
178
+ Pool :: new ( create)
179
+ } ;
171
180
Ok ( Regex {
172
181
config : Arc :: new ( self . config . clone ( ) ) ,
173
182
pattern : pattern. to_string ( ) ,
174
- code : Arc :: new ( code ) ,
183
+ code,
175
184
capture_names : Arc :: new ( capture_names) ,
176
185
capture_names_idx : Arc :: new ( idx) ,
177
- match_data : ThreadLocal :: new ( ) ,
186
+ match_data,
178
187
} )
179
188
}
180
189
@@ -356,25 +365,26 @@ pub struct Regex {
356
365
capture_names : Arc < Vec < Option < String > > > ,
357
366
/// A map from capture group name to capture group index.
358
367
capture_names_idx : Arc < HashMap < String , usize > > ,
359
- /// Mutable scratch data used by PCRE2 during matching.
360
- ///
361
- /// We use the same strategy as Rust's regex crate here (well, what it
362
- /// used to do, it now has its own pool), such that each thread gets its
363
- /// own match data to support using a Regex object from multiple threads
364
- /// simultaneously. If some match data doesn't exist for a thread, then a
365
- /// new one is created on demand.
366
- match_data : ThreadLocal < RefCell < MatchData > > ,
368
+ /// A pool of mutable scratch data used by PCRE2 during matching.
369
+ match_data : MatchDataPool ,
367
370
}
368
371
369
372
impl Clone for Regex {
370
373
fn clone ( & self ) -> Regex {
374
+ let match_data = {
375
+ let config = self . config . match_config . clone ( ) ;
376
+ let code = Arc :: clone ( & self . code ) ;
377
+ let create: MatchDataPoolFn =
378
+ Box :: new ( move || MatchData :: new ( config. clone ( ) , & code) ) ;
379
+ Pool :: new ( create)
380
+ } ;
371
381
Regex {
372
382
config : Arc :: clone ( & self . config ) ,
373
383
pattern : self . pattern . clone ( ) ,
374
384
code : Arc :: clone ( & self . code ) ,
375
385
capture_names : Arc :: clone ( & self . capture_names ) ,
376
386
capture_names_idx : Arc :: clone ( & self . capture_names_idx ) ,
377
- match_data : ThreadLocal :: new ( ) ,
387
+ match_data,
378
388
}
379
389
}
380
390
}
@@ -601,10 +611,12 @@ impl Regex {
601
611
) ;
602
612
603
613
let options = 0 ;
604
- let match_data = self . match_data ( ) ;
605
- let mut match_data = match_data. borrow_mut ( ) ;
614
+ let mut match_data = self . match_data ( ) ;
606
615
// SAFETY: We don't use any dangerous PCRE2 options.
607
- Ok ( unsafe { match_data. find ( & self . code , subject, start, options) ? } )
616
+ let res =
617
+ unsafe { match_data. find ( & self . code , subject, start, options) } ;
618
+ PoolGuard :: put ( match_data) ;
619
+ res
608
620
}
609
621
610
622
/// Returns the same as find, but starts the search at the given
@@ -618,7 +630,11 @@ impl Regex {
618
630
subject : & ' s [ u8 ] ,
619
631
start : usize ,
620
632
) -> Result < Option < Match < ' s > > , Error > {
621
- self . find_at_with_match_data ( self . match_data ( ) , subject, start)
633
+ let mut match_data = self . match_data ( ) ;
634
+ let res =
635
+ self . find_at_with_match_data ( & mut match_data, subject, start) ;
636
+ PoolGuard :: put ( match_data) ;
637
+ res
622
638
}
623
639
624
640
/// Like find_at, but accepts match data instead of acquiring one itself.
@@ -628,7 +644,7 @@ impl Regex {
628
644
#[ inline( always) ]
629
645
fn find_at_with_match_data < ' s > (
630
646
& self ,
631
- match_data : & RefCell < MatchData > ,
647
+ match_data : & mut MatchDataPoolGuard < ' _ > ,
632
648
subject : & ' s [ u8 ] ,
633
649
start : usize ,
634
650
) -> Result < Option < Match < ' s > > , Error > {
@@ -640,7 +656,6 @@ impl Regex {
640
656
) ;
641
657
642
658
let options = 0 ;
643
- let mut match_data = match_data. borrow_mut ( ) ;
644
659
// SAFETY: We don't use any dangerous PCRE2 options.
645
660
if unsafe { !match_data. find ( & self . code , subject, start, options) ? } {
646
661
return Ok ( None ) ;
@@ -737,9 +752,8 @@ impl Regex {
737
752
}
738
753
}
739
754
740
- fn match_data ( & self ) -> & RefCell < MatchData > {
741
- let create = || RefCell :: new ( self . new_match_data ( ) ) ;
742
- self . match_data . get_or ( create)
755
+ fn match_data ( & self ) -> MatchDataPoolGuard < ' _ > {
756
+ self . match_data . get ( )
743
757
}
744
758
745
759
fn new_match_data ( & self ) -> MatchData {
@@ -981,7 +995,7 @@ impl<'s, 'i> std::ops::Index<&'i str> for Captures<'s> {
981
995
/// lifetime of the subject string.
982
996
pub struct Matches < ' r , ' s > {
983
997
re : & ' r Regex ,
984
- match_data : & ' r RefCell < MatchData > ,
998
+ match_data : MatchDataPoolGuard < ' r > ,
985
999
subject : & ' s [ u8 ] ,
986
1000
last_end : usize ,
987
1001
last_match : Option < usize > ,
@@ -995,7 +1009,7 @@ impl<'r, 's> Iterator for Matches<'r, 's> {
995
1009
return None ;
996
1010
}
997
1011
let res = self . re . find_at_with_match_data (
998
- self . match_data ,
1012
+ & mut self . match_data ,
999
1013
self . subject ,
1000
1014
self . last_end ,
1001
1015
) ;
@@ -1073,6 +1087,18 @@ impl<'r, 's> Iterator for CaptureMatches<'r, 's> {
1073
1087
}
1074
1088
}
1075
1089
1090
+ /// A type alias for our pool of `MatchData` that fixes the type parameters to
1091
+ /// what we actually use in practice.
1092
+ type MatchDataPool = Pool < MatchData , MatchDataPoolFn > ;
1093
+
1094
+ /// Same as above, but for the guard returned by a pool.
1095
+ type MatchDataPoolGuard < ' a > = PoolGuard < ' a , MatchData , MatchDataPoolFn > ;
1096
+
1097
+ /// The type of the closure we use to create new caches. We need to spell out
1098
+ /// all of the marker traits or else we risk leaking !MARKER impls.
1099
+ type MatchDataPoolFn =
1100
+ Box < dyn Fn ( ) -> MatchData + Send + Sync + UnwindSafe + RefUnwindSafe > ;
1101
+
1076
1102
#[ cfg( test) ]
1077
1103
mod tests {
1078
1104
use super :: { Regex , RegexBuilder } ;
0 commit comments