Skip to content

Commit 73ca1d3

Browse files
Googlercopybara-github
authored andcommitted
Port the linear-time string globbing algorithm of https://research.swtch.com/glob
This supports '*' and '?' wildcard characters using the algorithm described in https://research.swtch.com/glob; this will be used for expressing test name filters for the upcoming `TESTBRIDGE_TEST_ONLY` feature support, to help match with C++ gtest behavior. PiperOrigin-RevId: 733788071
1 parent b5f3a93 commit 73ca1d3

File tree

2 files changed

+313
-0
lines changed

2 files changed

+313
-0
lines changed

googletest/src/internal/glob.rs

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
//! Pattern matches using glob wildcards `*` and `?` with the
2+
//! linear-time algorithm <https://research.swtch.com/glob>.
3+
4+
use std::iter::Peekable;
5+
use std::str::Chars;
6+
7+
pub struct Pattern(String);
8+
9+
impl Pattern {
10+
/// Creates a new pattern matcher. Each pattern consists of
11+
/// regular characters, single-character wildcards `'?'`, and
12+
/// multi-character wildcards `'*'`.
13+
pub fn new(pattern: String) -> Self {
14+
Self(pattern)
15+
}
16+
17+
/// Returns true if and only if the wildcard pattern matches the
18+
/// string.
19+
pub fn matches(&self, string: &str) -> bool {
20+
let processor = Processor {
21+
pattern: self.0.chars().peekable(),
22+
string: string.chars().peekable(),
23+
restart: None,
24+
};
25+
26+
processor.process()
27+
}
28+
}
29+
30+
type PeekableChars<'a> = Peekable<Chars<'a>>;
31+
32+
/// Represents the state we need to restart search from a star wildcard (`*`).
33+
struct Restart<'a> {
34+
pattern_next: PeekableChars<'a>,
35+
string_next: PeekableChars<'a>,
36+
}
37+
38+
/// The runtime state for glob matching.
39+
struct Processor<'a> {
40+
pattern: PeekableChars<'a>,
41+
string: PeekableChars<'a>,
42+
restart: Option<Restart<'a>>,
43+
}
44+
45+
/// Represents what to do after any step through the processor.
46+
enum StepOutcome {
47+
Proceed,
48+
TryRestart,
49+
}
50+
51+
impl Processor<'_> {
52+
/// Runs the pattern matching until we find an unrecoverable
53+
/// mismatch, or the input is consumed.
54+
fn process(mut self) -> bool {
55+
while self.is_unfinished() {
56+
let outcome = self.step();
57+
if let StepOutcome::TryRestart = outcome {
58+
let restarted = self.try_restart();
59+
if !restarted {
60+
return false;
61+
}
62+
}
63+
}
64+
65+
true
66+
}
67+
68+
fn is_unfinished(&mut self) -> bool {
69+
self.pattern.peek().is_some() || self.string.peek().is_some()
70+
}
71+
72+
/// Takes a single step forward, and returns whether to proceed or try to
73+
/// restart.
74+
fn step(&mut self) -> StepOutcome {
75+
match self.pattern.peek() {
76+
Some('?') => self.step_question_wildcard(),
77+
Some('*') => self.step_star_wildcard(),
78+
Some(pattern_ch) => {
79+
let pattern_ch = *pattern_ch;
80+
self.step_ordinary_character(pattern_ch)
81+
}
82+
None => StepOutcome::TryRestart,
83+
}
84+
}
85+
86+
/// Match any single character.
87+
fn step_question_wildcard(&mut self) -> StepOutcome {
88+
if self.string.peek().is_some() {
89+
_ = self.pattern.next();
90+
_ = self.string.next();
91+
StepOutcome::Proceed
92+
} else {
93+
StepOutcome::TryRestart
94+
}
95+
}
96+
97+
/// Match zero or more characters. Start by skipping over the
98+
/// wildcard and matching zero characters from string. If that
99+
/// fails, restart and match one more character than the last
100+
/// attempt.
101+
fn step_star_wildcard(&mut self) -> StepOutcome {
102+
self.restart = if self.string.peek().is_none() {
103+
// Subtle: if the string is already exhausted, we mark
104+
// that we can't restart.
105+
None
106+
} else {
107+
let pattern_next = self.pattern.clone();
108+
let mut string_next = self.string.clone();
109+
string_next.next();
110+
Some(Restart { pattern_next, string_next })
111+
};
112+
113+
_ = self.pattern.next();
114+
StepOutcome::Proceed
115+
}
116+
117+
/// Match an ordinary (non-wildcard) character.
118+
fn step_ordinary_character(&mut self, pattern_ch: char) -> StepOutcome {
119+
if self.string.peek() == Some(&pattern_ch) {
120+
_ = self.pattern.next();
121+
_ = self.string.next();
122+
StepOutcome::Proceed
123+
} else {
124+
StepOutcome::TryRestart
125+
}
126+
}
127+
128+
/// Try to restart from failing to match a character. If true, the
129+
/// matching can restart.
130+
fn try_restart(&mut self) -> bool {
131+
if let Some(Restart { pattern_next, string_next }) = &self.restart {
132+
self.pattern = pattern_next.clone();
133+
self.string = string_next.clone();
134+
true
135+
} else {
136+
false
137+
}
138+
}
139+
}
140+
141+
/// Returns true if `s` contains glob wildcards.
142+
pub fn is_glob_pattern(s: &str) -> bool {
143+
s.contains(['?', '*'])
144+
}
145+
146+
#[cfg(test)]
147+
mod tests {
148+
use super::*;
149+
use crate::prelude::*;
150+
151+
#[test]
152+
fn simple_character_match() -> Result<()> {
153+
verify_that!(Pattern::new("a".into()).matches("a"), is_true())
154+
}
155+
156+
#[test]
157+
fn simple_character_mismatch() -> Result<()> {
158+
verify_that!(Pattern::new("b".into()).matches("a"), is_false())
159+
}
160+
161+
#[test]
162+
fn simple_word_mismatch() -> Result<()> {
163+
verify_that!(Pattern::new("judgement".into()).matches("judgment"), is_false())
164+
}
165+
166+
#[test]
167+
fn question_match() -> Result<()> {
168+
verify_that!(Pattern::new("?".into()).matches("a"), is_true())
169+
}
170+
171+
#[test]
172+
fn simple_word_question_match() -> Result<()> {
173+
let pattern = Pattern::new("judg?ment".into());
174+
verify_that!(pattern.matches("judgment"), is_false())?;
175+
verify_that!(pattern.matches("judgement"), is_true())?;
176+
verify_that!(pattern.matches("judge ment"), is_false())?;
177+
Ok(())
178+
}
179+
180+
#[test]
181+
fn question_mismatch() -> Result<()> {
182+
let pattern = Pattern::new("?".into());
183+
verify_that!(pattern.matches(""), is_false())?;
184+
verify_that!(pattern.matches("aa"), is_false())?;
185+
Ok(())
186+
}
187+
188+
#[test]
189+
fn glob_on_empty() -> Result<()> {
190+
verify_that!(Pattern::new("*".into()).matches(""), is_true())?;
191+
verify_that!(Pattern::new("**".into()).matches(""), is_true())?;
192+
Ok(())
193+
}
194+
195+
#[test]
196+
fn glob_prefix() -> Result<()> {
197+
let pattern = Pattern::new("*a".into());
198+
verify_that!(pattern.matches(""), is_false())?;
199+
verify_that!(pattern.matches("a"), is_true())?;
200+
verify_that!(pattern.matches("ba"), is_true())?;
201+
verify_that!(pattern.matches("bba"), is_true())?;
202+
verify_that!(pattern.matches("bbab"), is_false())?;
203+
Ok(())
204+
}
205+
206+
#[test]
207+
fn glob_within() -> Result<()> {
208+
let pattern = Pattern::new("b*a".into());
209+
verify_that!(pattern.matches(""), is_false())?;
210+
verify_that!(pattern.matches("b"), is_false())?;
211+
verify_that!(pattern.matches("bb"), is_false())?;
212+
verify_that!(pattern.matches("ba"), is_true())?;
213+
verify_that!(pattern.matches("bbbba"), is_true())?;
214+
verify_that!(pattern.matches("baa"), is_true())?;
215+
Ok(())
216+
}
217+
218+
#[test]
219+
fn glob_suffix() -> Result<()> {
220+
let pattern = Pattern::new("ba*".into());
221+
verify_that!(pattern.matches(""), is_false())?;
222+
verify_that!(pattern.matches("b"), is_false())?;
223+
verify_that!(pattern.matches("bb"), is_false())?;
224+
verify_that!(pattern.matches("ba"), is_true())?;
225+
verify_that!(pattern.matches("baa"), is_true())?;
226+
verify_that!(pattern.matches("bab"), is_true())?;
227+
verify_that!(pattern.matches("bba"), is_false())?;
228+
verify_that!(pattern.matches("bbbba"), is_false())?;
229+
Ok(())
230+
}
231+
232+
#[test]
233+
fn redundant_stars() -> Result<()> {
234+
let pattern = Pattern::new("**a".into());
235+
verify_that!(pattern.matches(""), is_false())?;
236+
verify_that!(pattern.matches("a"), is_true())?;
237+
verify_that!(pattern.matches("ba"), is_true())?;
238+
verify_that!(pattern.matches("bba"), is_true())?;
239+
verify_that!(pattern.matches("bbab"), is_false())?;
240+
verify_that!(pattern.matches("bbaba"), is_true())?;
241+
Ok(())
242+
}
243+
244+
#[test]
245+
fn star_question_star_case() -> Result<()> {
246+
let pattern = Pattern::new("*?*".into());
247+
verify_that!(pattern.matches(""), is_false())?;
248+
verify_that!(pattern.matches("a"), is_true())?;
249+
verify_that!(pattern.matches("aa"), is_true())?;
250+
verify_that!(pattern.matches("aaa"), is_true())?;
251+
Ok(())
252+
}
253+
254+
#[test]
255+
fn another_case_finding_two_separated_a() -> Result<()> {
256+
let pattern = Pattern::new("*a?a*".into());
257+
verify_that!(pattern.matches(""), is_false())?;
258+
verify_that!(pattern.matches("a"), is_false())?;
259+
verify_that!(pattern.matches("aa"), is_false())?;
260+
verify_that!(pattern.matches("aaa"), is_true())?;
261+
verify_that!(pattern.matches("aba"), is_true())?;
262+
verify_that!(pattern.matches("baba"), is_true())?;
263+
verify_that!(pattern.matches("abab"), is_true())?;
264+
verify_that!(pattern.matches("babab"), is_true())?;
265+
Ok(())
266+
}
267+
268+
#[test]
269+
fn banana() -> Result<()> {
270+
let pattern = Pattern::new("b?n???".into());
271+
verify_that!(pattern.matches("banana"), is_true())?;
272+
verify_that!(pattern.matches("binary"), is_true())?;
273+
verify_that!(pattern.matches("bundle"), is_true())?;
274+
verify_that!(pattern.matches("bindir"), is_true())?;
275+
276+
verify_that!(pattern.matches("bananas"), is_false())?;
277+
verify_that!(pattern.matches("bucket"), is_false())?;
278+
verify_that!(pattern.matches("budget"), is_false())?;
279+
verify_that!(pattern.matches("bazzar"), is_false())?;
280+
verify_that!(pattern.matches("burger"), is_false())?;
281+
Ok(())
282+
}
283+
284+
#[test]
285+
fn glob_word() -> Result<()> {
286+
let word = Pattern::new("*word*".into());
287+
verify_that!(word.matches("bird"), is_false())?;
288+
verify_that!(word.matches("This is a wordy sentence"), is_true())?;
289+
verify_that!(word.matches("word soup"), is_true())?;
290+
verify_that!(word.matches("bird is the word"), is_true())?;
291+
verify_that!(word.matches("word"), is_true())?;
292+
Ok(())
293+
}
294+
295+
#[test]
296+
fn degenerate_glob() -> Result<()> {
297+
verify_that!(
298+
Pattern::new("************************************************.*".into())
299+
.matches("this is a test.com"),
300+
is_true()
301+
)
302+
}
303+
304+
#[test]
305+
fn degenerate_glob_2_mismatch() -> Result<()> {
306+
// The first example from https://research.swtch.com/glob, with N=1000.
307+
const N: usize = 1000;
308+
let long_pattern = ["a*"; N].into_iter().collect::<String>() + "b";
309+
let long_string = ["a"; N].into_iter().collect::<String>();
310+
verify_that!(Pattern::new(long_pattern).matches(&long_string), is_false())
311+
}
312+
}

googletest/src/internal/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
#![doc(hidden)]
1616

1717
pub(crate) mod description_renderer;
18+
pub mod glob;
1819
pub mod test_outcome;
1920
pub mod test_sharding;

0 commit comments

Comments
 (0)