Skip to content

Commit e4a99a0

Browse files
committed
disable unicode in network filter regex
1 parent 319f88b commit e4a99a0

File tree

3 files changed

+19
-13
lines changed

3 files changed

+19
-13
lines changed

src/engine.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,14 +768,15 @@ mod tests {
768768
"script").unwrap();
769769
assert!(engine.check_network_request(&request).matched);
770770
}*/
771-
{
771+
// fails - unicode not supported in network filter
772+
/*{
772773
let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default());
773774
let request = Request::new("https://example.com/tesT߶",
774775
"https://example.com",
775776
"script").unwrap();
776777
assert!(engine.check_network_request(&request).matched);
777-
}
778-
// fails - punycoded domain
778+
}*/
779+
// fails - unicode not supported in network filter
779780
/*{
780781
let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default());
781782
let request = Request::new("https://example-tesT߶.com/tesT",

src/filters/network.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
44
use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse};
55
use once_cell::sync::Lazy;
6-
use regex::{Regex, RegexSet};
6+
use regex::{
7+
bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
8+
bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex,
9+
};
710
use serde::{Deserialize, Serialize};
811
use thiserror::Error;
912

@@ -180,8 +183,8 @@ impl From<&request::RequestType> for NetworkFilterMask {
180183

181184
#[derive(Debug, Clone)]
182185
pub enum CompiledRegex {
183-
Compiled(Regex),
184-
CompiledSet(RegexSet),
186+
Compiled(BytesRegex),
187+
CompiledSet(BytesRegexSet),
185188
MatchAll,
186189
RegexParsingError(regex::Error),
187190
}
@@ -191,11 +194,11 @@ impl CompiledRegex {
191194
match &self {
192195
CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter
193196
CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile
194-
CompiledRegex::Compiled(r) => r.is_match(pattern),
197+
CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()),
195198
CompiledRegex::CompiledSet(r) => {
196199
// let matches: Vec<_> = r.matches(pattern).into_iter().collect();
197200
// println!("Matching {} against RegexSet: {:?}", pattern, matches);
198-
r.is_match(pattern)
201+
r.is_match(pattern.as_bytes())
199202
}
200203
}
201204
}
@@ -1225,15 +1228,15 @@ pub fn compile_regex(
12251228
CompiledRegex::MatchAll
12261229
} else if escaped_patterns.len() == 1 {
12271230
let pattern = &escaped_patterns[0];
1228-
match Regex::new(pattern) {
1231+
match BytesRegexBuilder::new(pattern).unicode(false).build() {
12291232
Ok(compiled) => CompiledRegex::Compiled(compiled),
12301233
Err(e) => {
12311234
// println!("Regex parsing failed ({:?})", e);
12321235
CompiledRegex::RegexParsingError(e)
12331236
}
12341237
}
12351238
} else {
1236-
match RegexSet::new(escaped_patterns) {
1239+
match BytesRegexSetBuilder::new(escaped_patterns).unicode(false).build() {
12371240
Ok(compiled) => CompiledRegex::CompiledSet(compiled),
12381241
Err(e) => CompiledRegex::RegexParsingError(e),
12391242
}

src/optimizer.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ mod optimization_tests_pattern_group {
209209
use crate::lists;
210210
use crate::regex_manager::RegexManager;
211211
use crate::request::Request;
212-
use regex::RegexSet;
212+
use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder;
213213

214214
fn check_regex_match(regex: &CompiledRegex, pattern: &str, matches: bool) {
215215
let is_match = regex.is_match(pattern);
@@ -244,13 +244,15 @@ mod optimization_tests_pattern_group {
244244

245245
#[test]
246246
fn regex_set_works() {
247-
let regex_set = RegexSet::new(&[
247+
let regex_set = BytesRegexSetBuilder::new(&[
248248
r"/static/ad\.",
249249
"/static/ad-",
250250
"/static/ad/.*",
251251
"/static/ads/.*",
252252
"/static/adv/.*",
253-
]);
253+
])
254+
.unicode(false)
255+
.build();
254256

255257
let fused_regex = CompiledRegex::CompiledSet(regex_set.unwrap());
256258
assert!(matches!(fused_regex, CompiledRegex::CompiledSet(_)));

0 commit comments

Comments
 (0)