Skip to content

Commit e15c59a

Browse files
committed
Partial match support
New methods: Regex::is_partial_match Regex::is_partial_match_at
1 parent 61687cd commit e15c59a

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

src/bytes.rs

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use log::debug;
88
use pcre2_sys::{
99
PCRE2_CASELESS, PCRE2_DOTALL, PCRE2_EXTENDED, PCRE2_MULTILINE,
1010
PCRE2_UCP, PCRE2_UTF, PCRE2_NO_UTF_CHECK, PCRE2_UNSET,
11-
PCRE2_NEWLINE_ANYCRLF,
11+
PCRE2_NEWLINE_ANYCRLF, PCRE2_PARTIAL_HARD
1212
};
1313
use thread_local::CachedThreadLocal;
1414

@@ -427,6 +427,25 @@ impl Regex {
427427
self.is_match_at(subject, 0)
428428
}
429429

430+
/// Returns true if and only if the regex fully or partially matches the subject string given.
431+
/// A partial match occurs when there is a match up to the end of a subject string,
432+
/// but more characters are needed to match the entire pattern.
433+
///
434+
/// # Example
435+
///
436+
/// Test if given string can be a beginning of a valid telephone number:
437+
/// ```rust
438+
/// # fn example() -> Result<(), ::pcre2::Error> {
439+
/// use pcre2::bytes::Regex;
440+
///
441+
/// let text = b"123-456-";
442+
/// assert!(Regex::new(r"^\d{3}-\d{3}-\d{3}")?.is_partial_match(text)?);
443+
/// # Ok(()) }; example().unwrap()
444+
/// ```
445+
pub fn is_partial_match(&self, subject: &[u8]) -> Result<bool, Error> {
446+
self.is_partial_match_at(subject, 0)
447+
}
448+
430449
/// Returns the start and end byte range of the leftmost-first match in
431450
/// `subject`. If no match exists, then `None` is returned.
432451
///
@@ -628,6 +647,39 @@ impl Regex {
628647
Ok(unsafe { match_data.find(&self.code, subject, start, options)? })
629648
}
630649

650+
/// Returns the same as is_partial_match, but starts the search at the given
651+
/// offset.
652+
///
653+
/// The significance of the starting point is that it takes the surrounding
654+
/// context into consideration. For example, the `\A` anchor can only
655+
/// match when `start == 0`.
656+
pub fn is_partial_match_at(
657+
&self,
658+
subject: &[u8],
659+
start: usize,
660+
) -> Result<bool, Error> {
661+
assert!(
662+
start <= subject.len(),
663+
"start ({}) must be <= subject.len() ({})",
664+
start,
665+
subject.len()
666+
);
667+
668+
let mut options = PCRE2_PARTIAL_HARD;
669+
if !self.config.utf_check {
670+
options |= PCRE2_NO_UTF_CHECK;
671+
}
672+
673+
let match_data = self.match_data();
674+
let mut match_data = match_data.borrow_mut();
675+
// SAFETY: The only unsafe PCRE2 option we potentially use here is
676+
// PCRE2_NO_UTF_CHECK, and that only occurs if the caller executes the
677+
// `disable_utf_check` method, which propagates the safety contract to
678+
// the caller.
679+
Ok(unsafe { match_data.find(&self.code, subject, start, options)? })
680+
}
681+
682+
631683
/// Returns the same as find, but starts the search at the given
632684
/// offset.
633685
///
@@ -1150,6 +1202,18 @@ mod tests {
11501202
assert!(re.is_match(b("Β")).unwrap());
11511203
}
11521204

1205+
#[test]
1206+
fn partial() {
1207+
let re = RegexBuilder::new()
1208+
.build("ab$")
1209+
.unwrap();
1210+
1211+
assert!(re.is_partial_match(b("a")).unwrap());
1212+
assert!(re.is_partial_match(b("ab")).unwrap());
1213+
assert!(!re.is_partial_match(b("abc")).unwrap());
1214+
assert!(!re.is_partial_match(b("b")).unwrap());
1215+
}
1216+
11531217
#[test]
11541218
fn crlf() {
11551219
let re = RegexBuilder::new()

src/ffi.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ impl Code {
9393
/// an error.
9494
pub fn jit_compile(&mut self) -> Result<(), Error> {
9595
let error_code = unsafe {
96-
pcre2_jit_compile_8(self.code, PCRE2_JIT_COMPLETE)
96+
pcre2_jit_compile_8(self.code, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD)
9797
};
9898
if error_code == 0 {
9999
self.compiled_jit = true;
@@ -427,6 +427,9 @@ impl MatchData {
427427
);
428428
if rc == PCRE2_ERROR_NOMATCH {
429429
Ok(false)
430+
} else if rc == PCRE2_ERROR_PARTIAL &&
431+
options & (PCRE2_PARTIAL_HARD | PCRE2_PARTIAL_SOFT) != 0 {
432+
Ok(true)
430433
} else if rc > 0 {
431434
Ok(true)
432435
} else {

0 commit comments

Comments
 (0)