-
-
Notifications
You must be signed in to change notification settings - Fork 36
Partial match support #13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ use log::debug; | |
use pcre2_sys::{ | ||
PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MULTILINE, | ||
PCRE2_UCP, PCRE2_UTF, PCRE2_NO_UTF_CHECK, PCRE2_UNSET, | ||
PCRE2_NEWLINE_ANYCRLF, | ||
PCRE2_NEWLINE_ANYCRLF, PCRE2_PARTIAL_HARD | ||
}; | ||
use thread_local::CachedThreadLocal; | ||
|
||
|
@@ -427,6 +427,25 @@ impl Regex { | |
self.is_match_at(subject, 0) | ||
} | ||
|
||
/// Returns true if and only if the regex fully or partially matches the subject string given. | ||
/// A partial match occurs when there is a match up to the end of a subject string, | ||
/// but more characters are needed to match the entire pattern. | ||
/// | ||
/// # Example | ||
/// | ||
/// Test if given string can be a beginning of a valid telephone number: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a blank line before the code block. |
||
/// ```rust | ||
/// # fn example() -> Result<(), ::pcre2::Error> { | ||
/// use pcre2::bytes::Regex; | ||
/// | ||
/// let text = b"123-456-"; | ||
/// assert!(Regex::new(r"^\d{3}-\d{3}-\d{3}")?.is_partial_match(text)?); | ||
/// # Ok(()) }; example().unwrap() | ||
/// ``` | ||
pub fn is_partial_match(&self, subject: &[u8]) -> Result<bool, Error> { | ||
self.is_partial_match_at(subject, 0) | ||
} | ||
|
||
/// Returns the start and end byte range of the leftmost-first match in | ||
/// `subject`. If no match exists, then `None` is returned. | ||
/// | ||
|
@@ -628,6 +647,39 @@ impl Regex { | |
Ok(unsafe { match_data.find(&self.code, subject, start, options)? }) | ||
} | ||
|
||
/// Returns the same as is_partial_match, but starts the search at the given | ||
/// offset. | ||
/// | ||
/// The significance of the starting point is that it takes the surrounding | ||
/// context into consideration. For example, the `\A` anchor can only | ||
/// match when `start == 0`. | ||
pub fn is_partial_match_at( | ||
&self, | ||
subject: &[u8], | ||
start: usize, | ||
) -> Result<bool, Error> { | ||
assert!( | ||
start <= subject.len(), | ||
"start ({}) must be <= subject.len() ({})", | ||
start, | ||
subject.len() | ||
); | ||
|
||
let mut options = PCRE2_PARTIAL_HARD; | ||
if !self.config.utf_check { | ||
options |= PCRE2_NO_UTF_CHECK; | ||
} | ||
|
||
let match_data = self.match_data(); | ||
let mut match_data = match_data.borrow_mut(); | ||
// SAFETY: The only unsafe PCRE2 option we potentially use here is | ||
// PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the | ||
// `disable_utf_check` method, which propagates the safety contract to | ||
// the caller. | ||
Ok(unsafe { match_data.find(&self.code, subject, start, options)? }) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks to me like this code might benefit from a slight refactor. Namely, So I'd say, create a private There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, will do. |
||
|
||
|
||
/// Returns the same as find, but starts the search at the given | ||
/// offset. | ||
/// | ||
|
@@ -1150,6 +1202,18 @@ mod tests { | |
assert!(re.is_match(b("Β")).unwrap()); | ||
} | ||
|
||
#[test] | ||
fn partial() { | ||
let re = RegexBuilder::new() | ||
.build("ab$") | ||
.unwrap(); | ||
|
||
assert!(re.is_partial_match(b("a")).unwrap()); | ||
assert!(re.is_partial_match(b("ab")).unwrap()); | ||
assert!(!re.is_partial_match(b("abc")).unwrap()); | ||
assert!(!re.is_partial_match(b("b")).unwrap()); | ||
} | ||
|
||
#[test] | ||
fn crlf() { | ||
let re = RegexBuilder::new() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,7 +93,7 @@ impl Code { | |
/// an error. | ||
pub fn jit_compile(&mut self) -> Result<(), Error> { | ||
let error_code = unsafe { | ||
pcre2_jit_compile_8(self.code, PCRE2_JIT_COMPLETE) | ||
pcre2_jit_compile_8(self.code, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this enable partial matching unconditionally when using the jit? If not, please add a comment and a test verifying that it doesn't. If it does, then we'll need to rethink how we do this because it implies that partial matching isn't just a search-time option, but also a compile-time option. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The documentation says:
So I read it, as indeed, this would cause more compile-time overhead, and it is good to give users control over which modes they want to enable. What about changing this to: pub fn jit_compile(&mut self, &JitOptions options) but then this would break backward compatibility :( |
||
}; | ||
if error_code == 0 { | ||
self.compiled_jit = true; | ||
|
@@ -427,6 +427,9 @@ impl MatchData { | |
); | ||
if rc == PCRE2_ERROR_NOMATCH { | ||
Ok(false) | ||
} else if rc == PCRE2_ERROR_PARTIAL && | ||
options & (PCRE2_PARTIAL_HARD | PCRE2_PARTIAL_SOFT) != 0 { | ||
Ok(true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose this behavior should be documented in this function's contract. |
||
} else if rc > 0 { | ||
Ok(true) | ||
} else { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code and comments should be wrapped to 79 columns (inclusive).