@@ -8,11 +8,12 @@ use std::{
8
8
sync:: LazyLock ,
9
9
} ;
10
10
11
+ use aho_corasick:: { AhoCorasick , MatchKind } ;
11
12
use derive_more:: Debug ;
12
13
pub use drive:: * ;
13
14
use once_cell:: sync:: OnceCell ;
14
15
use rayon:: prelude:: * ;
15
- use regex:: { Captures , Regex } ;
16
+ use regex:: Regex ;
16
17
use rspack_core:: {
17
18
rspack_sources:: { BoxSource , RawStringSource , SourceExt } ,
18
19
AssetInfo , BindingCell , Compilation , CompilationId , CompilationProcessAssets , Logger , Plugin ,
@@ -105,16 +106,18 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
105
106
return Ok ( ( ) ) ;
106
107
}
107
108
let start = logger. time ( "create hash regexp" ) ;
108
- let mut hash_list = hash_to_asset_names
109
+ let hash_list = hash_to_asset_names
109
110
. keys ( )
110
111
// xx\xx{xx?xx.xx -> xx\\xx\{xx\?xx\.xx escape for Regex::new
111
112
. map ( |hash| QUOTE_META . replace_all ( hash, "\\ $0" ) )
112
113
. collect :: < Vec < Cow < str > > > ( ) ;
113
- // long hash should sort before short hash to make sure match long hash first in hash_regexp matching
114
+ // use LeftmostLongest here:
114
115
// e.g. 4afc|4afcbe match xxx.4afcbe-4afc.js -> xxx.[4afc]be-[4afc].js
115
116
// 4afcbe|4afc match xxx.4afcbe-4afc.js -> xxx.[4afcbe]-[4afc].js
116
- hash_list. par_sort_by ( |a, b| b. len ( ) . cmp ( & a. len ( ) ) ) ;
117
- let hash_regexp = Regex :: new ( & hash_list. join ( "|" ) ) . expect ( "Invalid regex" ) ;
117
+ let hash_ac = AhoCorasick :: builder ( )
118
+ . match_kind ( MatchKind :: LeftmostLongest )
119
+ . build ( hash_list. iter ( ) . map ( |s| s. as_bytes ( ) ) )
120
+ . expect ( "Invalid patterns" ) ;
118
121
logger. time_end ( start) ;
119
122
120
123
let start = logger. time ( "create ordered hashes" ) ;
@@ -125,7 +128,7 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
125
128
asset. get_source ( ) . map ( |source| {
126
129
(
127
130
name. as_str ( ) ,
128
- AssetData :: new ( source. clone ( ) , asset. get_info ( ) , & hash_regexp ) ,
131
+ AssetData :: new ( source. clone ( ) , asset. get_info ( ) , & hash_ac ) ,
129
132
)
130
133
} )
131
134
} )
@@ -184,11 +187,8 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
184
187
let batch_sources = batch_source_tasks
185
188
. into_par_iter ( )
186
189
. map ( |( hash, name, data) | {
187
- let new_source = data. compute_new_source (
188
- data. own_hashes . contains ( hash) ,
189
- & hash_to_new_hash,
190
- & hash_regexp,
191
- ) ;
190
+ let new_source =
191
+ data. compute_new_source ( data. own_hashes . contains ( hash) , & hash_to_new_hash, & hash_ac) ;
192
192
( ( hash, name) , new_source)
193
193
} )
194
194
. collect :: < HashMap < _ , _ > > ( ) ;
@@ -254,18 +254,15 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
254
254
let updates: Vec < _ > = assets_data
255
255
. into_par_iter ( )
256
256
. filter_map ( |( name, data) | {
257
- let new_source = data. compute_new_source ( false , & hash_to_new_hash, & hash_regexp) ;
258
- let new_name = hash_regexp
259
- . replace_all ( name, |c : & Captures | {
260
- let hash = c
261
- . get ( 0 )
262
- . expect ( "RealContentHashPlugin: should have match" )
263
- . as_str ( ) ;
264
- hash_to_new_hash
265
- . get ( hash)
266
- . expect ( "RealContentHashPlugin: should have new hash" )
267
- } )
268
- . into_owned ( ) ;
257
+ let new_source = data. compute_new_source ( false , & hash_to_new_hash, & hash_ac) ;
258
+ let mut new_name = String :: with_capacity ( name. len ( ) ) ;
259
+ hash_ac. replace_all_with ( name, & mut new_name, |_, hash, dst| {
260
+ let replace_to = hash_to_new_hash
261
+ . get ( hash)
262
+ . expect ( "RealContentHashPlugin: should have new hash" ) ;
263
+ dst. push_str ( replace_to) ;
264
+ true
265
+ } ) ;
269
266
let new_name = ( name != new_name) . then_some ( new_name) ;
270
267
Some ( ( name. to_owned ( ) , new_source, new_name) )
271
268
} )
@@ -349,17 +346,18 @@ enum AssetDataContent {
349
346
}
350
347
351
348
impl AssetData {
352
- pub fn new ( source : BoxSource , info : & AssetInfo , hash_regexp : & Regex ) -> Self {
349
+ pub fn new ( source : BoxSource , info : & AssetInfo , hash_ac : & AhoCorasick ) -> Self {
353
350
let mut own_hashes = HashSet :: default ( ) ;
354
351
let mut referenced_hashes = HashSet :: default ( ) ;
355
352
// TODO(ahabhgk): source.is_buffer() instead of String::from_utf8().is_ok()
356
353
let content = if let Ok ( content) = String :: from_utf8 ( source. buffer ( ) . to_vec ( ) ) {
357
- for hash in hash_regexp. find_iter ( & content) {
358
- if info. content_hash . contains ( hash. as_str ( ) ) {
359
- own_hashes. insert ( hash. as_str ( ) . to_string ( ) ) ;
354
+ for hash in hash_ac. find_iter ( & content) {
355
+ let hash = & content[ hash. range ( ) ] ;
356
+ if info. content_hash . contains ( hash) {
357
+ own_hashes. insert ( hash. to_string ( ) ) ;
360
358
continue ;
361
359
}
362
- referenced_hashes. insert ( hash. as_str ( ) . to_string ( ) ) ;
360
+ referenced_hashes. insert ( hash. to_string ( ) ) ;
363
361
}
364
362
AssetDataContent :: String ( content)
365
363
} else {
@@ -380,7 +378,7 @@ impl AssetData {
380
378
& self ,
381
379
without_own : bool ,
382
380
hash_to_new_hash : & HashMap < String , String > ,
383
- hash_regexp : & Regex ,
381
+ hash_ac : & AhoCorasick ,
384
382
) -> BoxSource {
385
383
( if without_own {
386
384
& self . new_source_without_own
@@ -395,19 +393,19 @@ impl AssetData {
395
393
. iter ( )
396
394
. any ( |hash| matches ! ( hash_to_new_hash. get( hash. as_str( ) ) , Some ( h) if h != hash) ) )
397
395
{
398
- let new_content = hash_regexp . replace_all ( content, | c : & Captures | {
399
- let hash = c
400
- . get ( 0 )
401
- . expect ( "RealContentHashPlugin: should have matched" )
402
- . as_str ( ) ;
403
- if without_own && self . own_hashes . contains ( hash ) {
404
- return "" ;
405
- }
406
- hash_to_new_hash
407
- . get ( hash )
408
- . expect ( "RealContentHashPlugin: should have new hash" )
396
+ let mut new_content = String :: with_capacity ( content. len ( ) ) ;
397
+ hash_ac . replace_all_with ( content , & mut new_content , |_ , hash, dst| {
398
+ let replace_to = if without_own && self . own_hashes . contains ( hash ) {
399
+ ""
400
+ } else {
401
+ hash_to_new_hash
402
+ . get ( hash )
403
+ . expect ( "RealContentHashPlugin: should have new hash" )
404
+ } ;
405
+ dst . push_str ( replace_to ) ;
406
+ true
409
407
} ) ;
410
- return RawStringSource :: from ( new_content. into_owned ( ) ) . boxed ( ) ;
408
+ return RawStringSource :: from ( new_content) . boxed ( ) ;
411
409
}
412
410
self . old_source . clone ( )
413
411
} )
0 commit comments