|
| 1 | +//! Pattern matches using glob wildcards `*` and `?` with the |
| 2 | +//! linear-time algorithm <https://research.swtch.com/glob>. |
| 3 | +
|
| 4 | +use std::iter::Peekable; |
| 5 | +use std::str::Chars; |
| 6 | + |
| 7 | +pub struct Pattern(String); |
| 8 | + |
| 9 | +impl Pattern { |
| 10 | + /// Creates a new pattern matcher. Each pattern consists of |
| 11 | + /// regular characters, single-character wildcards `'?'`, and |
| 12 | + /// multi-character wildcards `'*'`. |
| 13 | + pub fn new(pattern: String) -> Self { |
| 14 | + Self(pattern) |
| 15 | + } |
| 16 | + |
| 17 | + /// Returns true if and only if the wildcard pattern matches the |
| 18 | + /// string. |
| 19 | + pub fn matches(&self, string: &str) -> bool { |
| 20 | + let processor = Processor { |
| 21 | + pattern: self.0.chars().peekable(), |
| 22 | + string: string.chars().peekable(), |
| 23 | + restart: None, |
| 24 | + }; |
| 25 | + |
| 26 | + processor.process() |
| 27 | + } |
| 28 | +} |
| 29 | + |
| 30 | +type PeekableChars<'a> = Peekable<Chars<'a>>; |
| 31 | + |
| 32 | +/// Represents the state we need to restart search from a star wildcard (`*`). |
| 33 | +struct Restart<'a> { |
| 34 | + pattern_next: PeekableChars<'a>, |
| 35 | + string_next: PeekableChars<'a>, |
| 36 | +} |
| 37 | + |
| 38 | +/// The runtime state for glob matching. |
| 39 | +struct Processor<'a> { |
| 40 | + pattern: PeekableChars<'a>, |
| 41 | + string: PeekableChars<'a>, |
| 42 | + restart: Option<Restart<'a>>, |
| 43 | +} |
| 44 | + |
| 45 | +/// Represents what to do after any step through the processor. |
| 46 | +enum StepOutcome { |
| 47 | + Proceed, |
| 48 | + TryRestart, |
| 49 | +} |
| 50 | + |
| 51 | +impl Processor<'_> { |
| 52 | + /// Runs the pattern matching until we find an unrecoverable |
| 53 | + /// mismatch, or the input is consumed. |
| 54 | + fn process(mut self) -> bool { |
| 55 | + while self.is_unfinished() { |
| 56 | + let outcome = self.step(); |
| 57 | + if let StepOutcome::TryRestart = outcome { |
| 58 | + let restarted = self.try_restart(); |
| 59 | + if !restarted { |
| 60 | + return false; |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | + |
| 65 | + true |
| 66 | + } |
| 67 | + |
| 68 | + fn is_unfinished(&mut self) -> bool { |
| 69 | + self.pattern.peek().is_some() || self.string.peek().is_some() |
| 70 | + } |
| 71 | + |
| 72 | + /// Takes a single step forward, and returns whether to proceed or try to |
| 73 | + /// restart. |
| 74 | + fn step(&mut self) -> StepOutcome { |
| 75 | + match self.pattern.peek() { |
| 76 | + Some('?') => self.step_question_wildcard(), |
| 77 | + Some('*') => self.step_star_wildcard(), |
| 78 | + Some(pattern_ch) => { |
| 79 | + let pattern_ch = *pattern_ch; |
| 80 | + self.step_ordinary_character(pattern_ch) |
| 81 | + } |
| 82 | + None => StepOutcome::TryRestart, |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + /// Match any single character. |
| 87 | + fn step_question_wildcard(&mut self) -> StepOutcome { |
| 88 | + if self.string.peek().is_some() { |
| 89 | + _ = self.pattern.next(); |
| 90 | + _ = self.string.next(); |
| 91 | + StepOutcome::Proceed |
| 92 | + } else { |
| 93 | + StepOutcome::TryRestart |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + /// Match zero or more characters. Start by skipping over the |
| 98 | + /// wildcard and matching zero characters from string. If that |
| 99 | + /// fails, restart and match one more character than the last |
| 100 | + /// attempt. |
| 101 | + fn step_star_wildcard(&mut self) -> StepOutcome { |
| 102 | + self.restart = if self.string.peek().is_none() { |
| 103 | + // Subtle: if the string is already exhausted, we mark |
| 104 | + // that we can't restart. |
| 105 | + None |
| 106 | + } else { |
| 107 | + let pattern_next = self.pattern.clone(); |
| 108 | + let mut string_next = self.string.clone(); |
| 109 | + string_next.next(); |
| 110 | + Some(Restart { pattern_next, string_next }) |
| 111 | + }; |
| 112 | + |
| 113 | + _ = self.pattern.next(); |
| 114 | + StepOutcome::Proceed |
| 115 | + } |
| 116 | + |
| 117 | + /// Match an ordinary (non-wildcard) character. |
| 118 | + fn step_ordinary_character(&mut self, pattern_ch: char) -> StepOutcome { |
| 119 | + if self.string.peek() == Some(&pattern_ch) { |
| 120 | + _ = self.pattern.next(); |
| 121 | + _ = self.string.next(); |
| 122 | + StepOutcome::Proceed |
| 123 | + } else { |
| 124 | + StepOutcome::TryRestart |
| 125 | + } |
| 126 | + } |
| 127 | + |
| 128 | + /// Try to restart from failing to match a character. If true, the |
| 129 | + /// matching can restart. |
| 130 | + fn try_restart(&mut self) -> bool { |
| 131 | + if let Some(Restart { pattern_next, string_next }) = &self.restart { |
| 132 | + self.pattern = pattern_next.clone(); |
| 133 | + self.string = string_next.clone(); |
| 134 | + true |
| 135 | + } else { |
| 136 | + false |
| 137 | + } |
| 138 | + } |
| 139 | +} |
| 140 | + |
| 141 | +/// Returns true if `s` contains glob wildcards. |
| 142 | +pub fn is_glob_pattern(s: &str) -> bool { |
| 143 | + s.contains(['?', '*']) |
| 144 | +} |
| 145 | + |
| 146 | +#[cfg(test)] |
| 147 | +mod tests { |
| 148 | + use super::*; |
| 149 | + use crate::prelude::*; |
| 150 | + |
| 151 | + #[test] |
| 152 | + fn simple_character_match() -> Result<()> { |
| 153 | + verify_that!(Pattern::new("a".into()).matches("a"), is_true()) |
| 154 | + } |
| 155 | + |
| 156 | + #[test] |
| 157 | + fn simple_character_mismatch() -> Result<()> { |
| 158 | + verify_that!(Pattern::new("b".into()).matches("a"), is_false()) |
| 159 | + } |
| 160 | + |
| 161 | + #[test] |
| 162 | + fn simple_word_mismatch() -> Result<()> { |
| 163 | + verify_that!(Pattern::new("judgement".into()).matches("judgment"), is_false()) |
| 164 | + } |
| 165 | + |
| 166 | + #[test] |
| 167 | + fn question_match() -> Result<()> { |
| 168 | + verify_that!(Pattern::new("?".into()).matches("a"), is_true()) |
| 169 | + } |
| 170 | + |
| 171 | + #[test] |
| 172 | + fn simple_word_question_match() -> Result<()> { |
| 173 | + let pattern = Pattern::new("judg?ment".into()); |
| 174 | + verify_that!(pattern.matches("judgment"), is_false())?; |
| 175 | + verify_that!(pattern.matches("judgement"), is_true())?; |
| 176 | + verify_that!(pattern.matches("judge ment"), is_false())?; |
| 177 | + Ok(()) |
| 178 | + } |
| 179 | + |
| 180 | + #[test] |
| 181 | + fn question_mismatch() -> Result<()> { |
| 182 | + let pattern = Pattern::new("?".into()); |
| 183 | + verify_that!(pattern.matches(""), is_false())?; |
| 184 | + verify_that!(pattern.matches("aa"), is_false())?; |
| 185 | + Ok(()) |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn glob_on_empty() -> Result<()> { |
| 190 | + verify_that!(Pattern::new("*".into()).matches(""), is_true())?; |
| 191 | + verify_that!(Pattern::new("**".into()).matches(""), is_true())?; |
| 192 | + Ok(()) |
| 193 | + } |
| 194 | + |
| 195 | + #[test] |
| 196 | + fn glob_prefix() -> Result<()> { |
| 197 | + let pattern = Pattern::new("*a".into()); |
| 198 | + verify_that!(pattern.matches(""), is_false())?; |
| 199 | + verify_that!(pattern.matches("a"), is_true())?; |
| 200 | + verify_that!(pattern.matches("ba"), is_true())?; |
| 201 | + verify_that!(pattern.matches("bba"), is_true())?; |
| 202 | + verify_that!(pattern.matches("bbab"), is_false())?; |
| 203 | + Ok(()) |
| 204 | + } |
| 205 | + |
| 206 | + #[test] |
| 207 | + fn glob_within() -> Result<()> { |
| 208 | + let pattern = Pattern::new("b*a".into()); |
| 209 | + verify_that!(pattern.matches(""), is_false())?; |
| 210 | + verify_that!(pattern.matches("b"), is_false())?; |
| 211 | + verify_that!(pattern.matches("bb"), is_false())?; |
| 212 | + verify_that!(pattern.matches("ba"), is_true())?; |
| 213 | + verify_that!(pattern.matches("bbbba"), is_true())?; |
| 214 | + verify_that!(pattern.matches("baa"), is_true())?; |
| 215 | + Ok(()) |
| 216 | + } |
| 217 | + |
| 218 | + #[test] |
| 219 | + fn glob_suffix() -> Result<()> { |
| 220 | + let pattern = Pattern::new("ba*".into()); |
| 221 | + verify_that!(pattern.matches(""), is_false())?; |
| 222 | + verify_that!(pattern.matches("b"), is_false())?; |
| 223 | + verify_that!(pattern.matches("bb"), is_false())?; |
| 224 | + verify_that!(pattern.matches("ba"), is_true())?; |
| 225 | + verify_that!(pattern.matches("baa"), is_true())?; |
| 226 | + verify_that!(pattern.matches("bab"), is_true())?; |
| 227 | + verify_that!(pattern.matches("bba"), is_false())?; |
| 228 | + verify_that!(pattern.matches("bbbba"), is_false())?; |
| 229 | + Ok(()) |
| 230 | + } |
| 231 | + |
| 232 | + #[test] |
| 233 | + fn redundant_stars() -> Result<()> { |
| 234 | + let pattern = Pattern::new("**a".into()); |
| 235 | + verify_that!(pattern.matches(""), is_false())?; |
| 236 | + verify_that!(pattern.matches("a"), is_true())?; |
| 237 | + verify_that!(pattern.matches("ba"), is_true())?; |
| 238 | + verify_that!(pattern.matches("bba"), is_true())?; |
| 239 | + verify_that!(pattern.matches("bbab"), is_false())?; |
| 240 | + verify_that!(pattern.matches("bbaba"), is_true())?; |
| 241 | + Ok(()) |
| 242 | + } |
| 243 | + |
| 244 | + #[test] |
| 245 | + fn star_question_star_case() -> Result<()> { |
| 246 | + let pattern = Pattern::new("*?*".into()); |
| 247 | + verify_that!(pattern.matches(""), is_false())?; |
| 248 | + verify_that!(pattern.matches("a"), is_true())?; |
| 249 | + verify_that!(pattern.matches("aa"), is_true())?; |
| 250 | + verify_that!(pattern.matches("aaa"), is_true())?; |
| 251 | + Ok(()) |
| 252 | + } |
| 253 | + |
| 254 | + #[test] |
| 255 | + fn another_case_finding_two_separated_a() -> Result<()> { |
| 256 | + let pattern = Pattern::new("*a?a*".into()); |
| 257 | + verify_that!(pattern.matches(""), is_false())?; |
| 258 | + verify_that!(pattern.matches("a"), is_false())?; |
| 259 | + verify_that!(pattern.matches("aa"), is_false())?; |
| 260 | + verify_that!(pattern.matches("aaa"), is_true())?; |
| 261 | + verify_that!(pattern.matches("aba"), is_true())?; |
| 262 | + verify_that!(pattern.matches("baba"), is_true())?; |
| 263 | + verify_that!(pattern.matches("abab"), is_true())?; |
| 264 | + verify_that!(pattern.matches("babab"), is_true())?; |
| 265 | + Ok(()) |
| 266 | + } |
| 267 | + |
| 268 | + #[test] |
| 269 | + fn banana() -> Result<()> { |
| 270 | + let pattern = Pattern::new("b?n???".into()); |
| 271 | + verify_that!(pattern.matches("banana"), is_true())?; |
| 272 | + verify_that!(pattern.matches("binary"), is_true())?; |
| 273 | + verify_that!(pattern.matches("bundle"), is_true())?; |
| 274 | + verify_that!(pattern.matches("bindir"), is_true())?; |
| 275 | + |
| 276 | + verify_that!(pattern.matches("bananas"), is_false())?; |
| 277 | + verify_that!(pattern.matches("bucket"), is_false())?; |
| 278 | + verify_that!(pattern.matches("budget"), is_false())?; |
| 279 | + verify_that!(pattern.matches("bazzar"), is_false())?; |
| 280 | + verify_that!(pattern.matches("burger"), is_false())?; |
| 281 | + Ok(()) |
| 282 | + } |
| 283 | + |
| 284 | + #[test] |
| 285 | + fn glob_word() -> Result<()> { |
| 286 | + let word = Pattern::new("*word*".into()); |
| 287 | + verify_that!(word.matches("bird"), is_false())?; |
| 288 | + verify_that!(word.matches("This is a wordy sentence"), is_true())?; |
| 289 | + verify_that!(word.matches("word soup"), is_true())?; |
| 290 | + verify_that!(word.matches("bird is the word"), is_true())?; |
| 291 | + verify_that!(word.matches("word"), is_true())?; |
| 292 | + Ok(()) |
| 293 | + } |
| 294 | + |
| 295 | + #[test] |
| 296 | + fn degenerate_glob() -> Result<()> { |
| 297 | + verify_that!( |
| 298 | + Pattern::new("************************************************.*".into()) |
| 299 | + .matches("this is a test.com"), |
| 300 | + is_true() |
| 301 | + ) |
| 302 | + } |
| 303 | + |
| 304 | + #[test] |
| 305 | + fn degenerate_glob_2_mismatch() -> Result<()> { |
| 306 | + // The first example from https://research.swtch.com/glob, with N=1000. |
| 307 | + const N: usize = 1000; |
| 308 | + let long_pattern = ["a*"; N].into_iter().collect::<String>() + "b"; |
| 309 | + let long_string = ["a"; N].into_iter().collect::<String>(); |
| 310 | + verify_that!(Pattern::new(long_pattern).matches(&long_string), is_false()) |
| 311 | + } |
| 312 | +} |
0 commit comments