Skip to content

Commit 4e2081b

Browse files
authored
fix: use bigger size limit for real content hash (#10907)
1 parent 528c6e1 commit 4e2081b

File tree

4 files changed

+42
-41
lines changed

4 files changed

+42
-41
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ version = "0.2.0"
1515
[workspace.metadata.cargo-shear]
1616
ignored = ["swc", "rspack"]
1717
[workspace.dependencies]
18+
aho-corasick = { version = "1.1.3" }
1819
anyhow = { version = "1.0.95", features = ["backtrace"] }
1920
anymap = { package = "anymap3", version = "1.0.1" }
2021
async-recursion = { version = "1.1.1" }

crates/rspack_plugin_real_content_hash/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ version.workspace = true
88
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
99

1010
[dependencies]
11+
aho-corasick = { workspace = true }
1112
dashmap = { workspace = true }
1213
derive_more = { workspace = true, features = ["debug"] }
1314
indexmap = { workspace = true }

crates/rspack_plugin_real_content_hash/src/lib.rs

Lines changed: 39 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ use std::{
88
sync::LazyLock,
99
};
1010

11+
use aho_corasick::{AhoCorasick, MatchKind};
1112
use derive_more::Debug;
1213
pub use drive::*;
1314
use once_cell::sync::OnceCell;
1415
use rayon::prelude::*;
15-
use regex::{Captures, Regex};
16+
use regex::Regex;
1617
use rspack_core::{
1718
rspack_sources::{BoxSource, RawStringSource, SourceExt},
1819
AssetInfo, BindingCell, Compilation, CompilationId, CompilationProcessAssets, Logger, Plugin,
@@ -105,16 +106,18 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
105106
return Ok(());
106107
}
107108
let start = logger.time("create hash regexp");
108-
let mut hash_list = hash_to_asset_names
109+
let hash_list = hash_to_asset_names
109110
.keys()
110111
// xx\xx{xx?xx.xx -> xx\\xx\{xx\?xx\.xx escape for Regex::new
111112
.map(|hash| QUOTE_META.replace_all(hash, "\\$0"))
112113
.collect::<Vec<Cow<str>>>();
113-
// long hash should sort before short hash to make sure match long hash first in hash_regexp matching
114+
// use LeftmostLongest here:
114115
// e.g. 4afc|4afcbe match xxx.4afcbe-4afc.js -> xxx.[4afc]be-[4afc].js
115116
// 4afcbe|4afc match xxx.4afcbe-4afc.js -> xxx.[4afcbe]-[4afc].js
116-
hash_list.par_sort_by(|a, b| b.len().cmp(&a.len()));
117-
let hash_regexp = Regex::new(&hash_list.join("|")).expect("Invalid regex");
117+
let hash_ac = AhoCorasick::builder()
118+
.match_kind(MatchKind::LeftmostLongest)
119+
.build(hash_list.iter().map(|s| s.as_bytes()))
120+
.expect("Invalid patterns");
118121
logger.time_end(start);
119122

120123
let start = logger.time("create ordered hashes");
@@ -125,7 +128,7 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
125128
asset.get_source().map(|source| {
126129
(
127130
name.as_str(),
128-
AssetData::new(source.clone(), asset.get_info(), &hash_regexp),
131+
AssetData::new(source.clone(), asset.get_info(), &hash_ac),
129132
)
130133
})
131134
})
@@ -184,11 +187,8 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
184187
let batch_sources = batch_source_tasks
185188
.into_par_iter()
186189
.map(|(hash, name, data)| {
187-
let new_source = data.compute_new_source(
188-
data.own_hashes.contains(hash),
189-
&hash_to_new_hash,
190-
&hash_regexp,
191-
);
190+
let new_source =
191+
data.compute_new_source(data.own_hashes.contains(hash), &hash_to_new_hash, &hash_ac);
192192
((hash, name), new_source)
193193
})
194194
.collect::<HashMap<_, _>>();
@@ -254,18 +254,15 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> {
254254
let updates: Vec<_> = assets_data
255255
.into_par_iter()
256256
.filter_map(|(name, data)| {
257-
let new_source = data.compute_new_source(false, &hash_to_new_hash, &hash_regexp);
258-
let new_name = hash_regexp
259-
.replace_all(name, |c: &Captures| {
260-
let hash = c
261-
.get(0)
262-
.expect("RealContentHashPlugin: should have match")
263-
.as_str();
264-
hash_to_new_hash
265-
.get(hash)
266-
.expect("RealContentHashPlugin: should have new hash")
267-
})
268-
.into_owned();
257+
let new_source = data.compute_new_source(false, &hash_to_new_hash, &hash_ac);
258+
let mut new_name = String::with_capacity(name.len());
259+
hash_ac.replace_all_with(name, &mut new_name, |_, hash, dst| {
260+
let replace_to = hash_to_new_hash
261+
.get(hash)
262+
.expect("RealContentHashPlugin: should have new hash");
263+
dst.push_str(replace_to);
264+
true
265+
});
269266
let new_name = (name != new_name).then_some(new_name);
270267
Some((name.to_owned(), new_source, new_name))
271268
})
@@ -349,17 +346,18 @@ enum AssetDataContent {
349346
}
350347

351348
impl AssetData {
352-
pub fn new(source: BoxSource, info: &AssetInfo, hash_regexp: &Regex) -> Self {
349+
pub fn new(source: BoxSource, info: &AssetInfo, hash_ac: &AhoCorasick) -> Self {
353350
let mut own_hashes = HashSet::default();
354351
let mut referenced_hashes = HashSet::default();
355352
// TODO(ahabhgk): source.is_buffer() instead of String::from_utf8().is_ok()
356353
let content = if let Ok(content) = String::from_utf8(source.buffer().to_vec()) {
357-
for hash in hash_regexp.find_iter(&content) {
358-
if info.content_hash.contains(hash.as_str()) {
359-
own_hashes.insert(hash.as_str().to_string());
354+
for hash in hash_ac.find_iter(&content) {
355+
let hash = &content[hash.range()];
356+
if info.content_hash.contains(hash) {
357+
own_hashes.insert(hash.to_string());
360358
continue;
361359
}
362-
referenced_hashes.insert(hash.as_str().to_string());
360+
referenced_hashes.insert(hash.to_string());
363361
}
364362
AssetDataContent::String(content)
365363
} else {
@@ -380,7 +378,7 @@ impl AssetData {
380378
&self,
381379
without_own: bool,
382380
hash_to_new_hash: &HashMap<String, String>,
383-
hash_regexp: &Regex,
381+
hash_ac: &AhoCorasick,
384382
) -> BoxSource {
385383
(if without_own {
386384
&self.new_source_without_own
@@ -395,19 +393,19 @@ impl AssetData {
395393
.iter()
396394
.any(|hash| matches!(hash_to_new_hash.get(hash.as_str()), Some(h) if h != hash)))
397395
{
398-
let new_content = hash_regexp.replace_all(content, |c: &Captures| {
399-
let hash = c
400-
.get(0)
401-
.expect("RealContentHashPlugin: should have matched")
402-
.as_str();
403-
if without_own && self.own_hashes.contains(hash) {
404-
return "";
405-
}
406-
hash_to_new_hash
407-
.get(hash)
408-
.expect("RealContentHashPlugin: should have new hash")
396+
let mut new_content = String::with_capacity(content.len());
397+
hash_ac.replace_all_with(content, &mut new_content, |_, hash, dst| {
398+
let replace_to = if without_own && self.own_hashes.contains(hash) {
399+
""
400+
} else {
401+
hash_to_new_hash
402+
.get(hash)
403+
.expect("RealContentHashPlugin: should have new hash")
404+
};
405+
dst.push_str(replace_to);
406+
true
409407
});
410-
return RawStringSource::from(new_content.into_owned()).boxed();
408+
return RawStringSource::from(new_content).boxed();
411409
}
412410
self.old_source.clone()
413411
})

0 commit comments

Comments
 (0)