Skip to content

Commit

Permalink
Merge pull request #393 from Ambyjkl/regex-optimization
Browse files Browse the repository at this point in the history
Disable unicode in network filter regex
  • Loading branch information
antonok-edm authored Oct 3, 2024
2 parents 7261566 + e4a99a0 commit 71bdd7e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 13 deletions.
7 changes: 4 additions & 3 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,14 +768,15 @@ mod tests {
"script").unwrap();
assert!(engine.check_network_request(&request).matched);
}*/
{
// fails - unicode not supported in network filter
/*{
let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default());
let request = Request::new("https://example.com/tesT߶",
"https://example.com",
"script").unwrap();
assert!(engine.check_network_request(&request).matched);
}
// fails - punycoded domain
}*/
// fails - unicode not supported in network filter
/*{
let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default());
let request = Request::new("https://example-tesT߶.com/tesT",
Expand Down
17 changes: 10 additions & 7 deletions src/filters/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse};
use once_cell::sync::Lazy;
use regex::{Regex, RegexSet};
use regex::{
bytes::Regex as BytesRegex, bytes::RegexBuilder as BytesRegexBuilder,
bytes::RegexSet as BytesRegexSet, bytes::RegexSetBuilder as BytesRegexSetBuilder, Regex,
};
use serde::{Deserialize, Serialize};
use thiserror::Error;

Expand Down Expand Up @@ -180,8 +183,8 @@ impl From<&request::RequestType> for NetworkFilterMask {

#[derive(Debug, Clone)]
pub enum CompiledRegex {
Compiled(Regex),
CompiledSet(RegexSet),
Compiled(BytesRegex),
CompiledSet(BytesRegexSet),
MatchAll,
RegexParsingError(regex::Error),
}
Expand All @@ -191,11 +194,11 @@ impl CompiledRegex {
match &self {
CompiledRegex::MatchAll => true, // simple case for matching everything, e.g. for empty filter
CompiledRegex::RegexParsingError(_e) => false, // no match if regex didn't even compile
CompiledRegex::Compiled(r) => r.is_match(pattern),
CompiledRegex::Compiled(r) => r.is_match(pattern.as_bytes()),
CompiledRegex::CompiledSet(r) => {
// let matches: Vec<_> = r.matches(pattern).into_iter().collect();
// println!("Matching {} against RegexSet: {:?}", pattern, matches);
r.is_match(pattern)
r.is_match(pattern.as_bytes())
}
}
}
Expand Down Expand Up @@ -1235,15 +1238,15 @@ pub(crate) fn compile_regex(
CompiledRegex::MatchAll
} else if escaped_patterns.len() == 1 {
let pattern = &escaped_patterns[0];
match Regex::new(pattern) {
match BytesRegexBuilder::new(pattern).unicode(false).build() {
Ok(compiled) => CompiledRegex::Compiled(compiled),
Err(e) => {
// println!("Regex parsing failed ({:?})", e);
CompiledRegex::RegexParsingError(e)
}
}
} else {
match RegexSet::new(escaped_patterns) {
match BytesRegexSetBuilder::new(escaped_patterns).unicode(false).build() {
Ok(compiled) => CompiledRegex::CompiledSet(compiled),
Err(e) => CompiledRegex::RegexParsingError(e),
}
Expand Down
8 changes: 5 additions & 3 deletions src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ mod optimization_tests_pattern_group {
use crate::lists;
use crate::regex_manager::RegexManager;
use crate::request::Request;
use regex::RegexSet;
use regex::bytes::RegexSetBuilder as BytesRegexSetBuilder;

fn check_regex_match(regex: &CompiledRegex, pattern: &str, matches: bool) {
let is_match = regex.is_match(pattern);
Expand Down Expand Up @@ -244,13 +244,15 @@ mod optimization_tests_pattern_group {

#[test]
fn regex_set_works() {
let regex_set = RegexSet::new(&[
let regex_set = BytesRegexSetBuilder::new(&[
r"/static/ad\.",
"/static/ad-",
"/static/ad/.*",
"/static/ads/.*",
"/static/adv/.*",
]);
])
.unicode(false)
.build();

let fused_regex = CompiledRegex::CompiledSet(regex_set.unwrap());
assert!(matches!(fused_regex, CompiledRegex::CompiledSet(_)));
Expand Down

0 comments on commit 71bdd7e

Please sign in to comment.