Skip to content

Commit 177cdde

Browse files
committed
Support char classes
1 parent ab3c439 commit 177cdde

File tree

2 files changed

+82
-23
lines changed

2 files changed

+82
-23
lines changed

src/Util/FileMatcher.php

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use function array_key_last;
1313
use function array_pop;
1414
use function count;
15+
use function ctype_alpha;
1516
use function preg_match;
1617
use function preg_quote;
1718
use function sprintf;
@@ -39,6 +40,8 @@
3940
private const T_GREEDY_GLOBSTAR = 'greedy_globstar';
4041
private const T_QUERY = 'query';
4142
private const T_GLOBSTAR = 'globstar';
43+
private const T_COLON = 'colon';
44+
private const T_CHAR_CLASS = 'char_class';
4245

4346
public static function match(string $path, FileMatcherPattern $pattern): bool
4447
{
@@ -80,6 +83,7 @@ public static function toRegEx($glob, $flags = 0): string
8083
self::T_BRACKET_OPEN => '[',
8184
self::T_BRACKET_CLOSE => ']',
8285
self::T_HYPHEN => '-',
86+
self::T_CHAR_CLASS => '[:' . $token[1] . ':]',
8387
default => '',
8488
};
8589
}
@@ -121,6 +125,7 @@ private static function tokenize(string $glob): array
121125
'*' => [self::T_ASTERIX, $c],
122126
'/' => [self::T_SLASH, $c],
123127
'\\' => [self::T_BACKSLASH, $c],
128+
':' => [self::T_COLON, $c],
124129
default => [self::T_CHAR, $c],
125130
};
126131
}
@@ -135,13 +140,14 @@ private static function tokenize(string $glob): array
135140
*/
136141
private static function processTokens(array $tokens): array
137142
{
138-
$resolved = [];
139-
$escaped = false;
143+
$resolved = [];
144+
$escaped = false;
140145
$bracketOpen = false;
141-
$brackets = [];
146+
$brackets = [];
142147

143148
for ($offset = 0; $offset < count($tokens); $offset++) {
144149
[$type, $char] = $tokens[$offset];
150+
$nextType = $tokens[$offset + 1][0] ?? null;
145151

146152
if ($type === self::T_BACKSLASH && false === $escaped) {
147153
$escaped = true;
@@ -205,27 +211,50 @@ private static function processTokens(array $tokens): array
205211
continue;
206212
}
207213

208-
if ($type === self::T_BRACKET_OPEN && $tokens[$offset + 1][0] === self::T_BRACKET_CLOSE) {
214+
if ($type === self::T_BRACKET_OPEN && $nextType === self::T_BRACKET_CLOSE) {
209215
$bracketOpen = true;
210-
$resolved[] = [self::T_BRACKET_OPEN, '['];
211-
$brackets[] = array_key_last($resolved);
212-
$resolved[] = [self::T_CHAR, ']'];
213-
$offset += 1;
216+
$resolved[] = [self::T_BRACKET_OPEN, '['];
217+
$brackets[] = array_key_last($resolved);
218+
$resolved[] = [self::T_CHAR, ']'];
219+
$offset++;
214220

215221
continue;
216222
}
217223

224+
if ($bracketOpen && $type === self::T_BRACKET_OPEN && $nextType === self::T_COLON) {
225+
// this looks like a named [:character:] class
226+
$class = '';
227+
$offset += 2;
228+
229+
// parse the character class name
230+
while (ctype_alpha($tokens[$offset][1])) {
231+
$class .= $tokens[$offset++][1];
232+
}
233+
234+
// if followed by a `:` then it's a character class
235+
if ($tokens[$offset][0] === self::T_COLON) {
236+
$offset++;
237+
$resolved[] = [self::T_CHAR_CLASS, $class];
238+
239+
continue;
240+
}
241+
242+
// otherwise it's a harmless literal
243+
$resolved[] = [self::T_CHAR, ':' . $class];
244+
}
245+
218246
if ($bracketOpen === true && $type === self::T_BRACKET_OPEN) {
219247
// if bracket is already open, interpret everything as a
220248
// literal char
221249
$resolved[] = [self::T_CHAR, $char];
250+
222251
continue;
223252
}
224253

225254
if ($bracketOpen === false && $type === self::T_BRACKET_OPEN) {
226255
$bracketOpen = true;
227-
$resolved[] = [$type, $char];
228-
$brackets[] = array_key_last($resolved);
256+
$resolved[] = [$type, $char];
257+
$brackets[] = array_key_last($resolved);
229258

230259
continue;
231260
}

tests/unit/Util/FileMatcherTest.php

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ public static function provideCharacterGroup(): Generator
302302
'/A' => true,
303303
'/B' => true,
304304

305-
'/Z' => false,
305+
'/Z' => false,
306306
'/[c' => false,
307307
],
308308
];
@@ -503,17 +503,47 @@ public static function provideCharacterGroup(): Generator
503503
// [:alnum:] [:alpha:] [:blank:] [:cntrl:]
504504
// [:digit:] [:graph:] [:lower:] [:print:]
505505
// [:punct:] [:space:] [:upper:] [:xdigit:]
506-
yield 'character class...' => [
507-
new FileMatcherPattern('/a/[:alnum:]/c'),
506+
yield 'character class [:alnum:]' => [
507+
new FileMatcherPattern('/a/[[:alnum:]]/c'),
508508
[
509509
'/a/1/c' => true,
510510
'/a/2/c' => true,
511511
'/b/!/c' => false,
512512
],
513-
'Named character classes',
514513
];
515514

516-
// TODO: all of these?
515+
yield 'character class [:digit:]' => [
516+
new FileMatcherPattern('/a/[[:digit:]]/c'),
517+
[
518+
'/a/1/c' => true,
519+
'/a/2/c' => true,
520+
'/b/!/c' => false,
521+
'/b/b/c' => false,
522+
],
523+
];
524+
525+
yield 'multiple character classes' => [
526+
new FileMatcherPattern('/a/[[:digit:][:lower:]]/c'),
527+
[
528+
'/a/1/c' => true,
529+
'/a/2/c' => true,
530+
'/b/!/c' => false,
531+
'/a/b/c' => true,
532+
],
533+
];
534+
535+
yield 'multiple character classes and range' => [
536+
new FileMatcherPattern('/a/[@[:upper:][:lower:]5-7]/c'),
537+
[
538+
'/a/b/c' => true,
539+
'/a/B/c' => true,
540+
'/a/5/c' => true,
541+
'/a/7/c' => true,
542+
'/a/@/c' => true,
543+
],
544+
];
545+
546+
// TODO: ...
517547
// Collating symbols, like "[.ch.]" or "[.a-acute.]", where the
518548
// string between "[." and ".]" is a collating element defined for
519549
// the current locale. Note that this may be a multicharacter
@@ -527,7 +557,7 @@ public static function provideCharacterGroup(): Generator
527557
'Collating symbols',
528558
];
529559

530-
// TODO: all of these?
560+
// TODO: ...
531561
// Equivalence class expressions, like "[=a=]", where the string
532562
// between "[=" and "=]" is any collating element from its
533563
// equivalence class, as defined for the current locale. For
@@ -560,13 +590,6 @@ public static function provideRelativePathSegments(): Generator
560590
];
561591
}
562592

563-
public function testExceptionIfPathIsNotAbsolute(): void
564-
{
565-
$this->expectException(RuntimeException::class);
566-
$this->expectExceptionMessage('Path "foo/bar" must be absolute');
567-
FileMatcher::match('foo/bar', new FileMatcherPattern(''));
568-
}
569-
570593
/**
571594
* @param array<FileMatcherPattern,bool> $matchMap
572595
*/
@@ -585,6 +608,13 @@ public function testMatch(FileMatcherPattern $pattern, array $matchMap, ?string
585608
self::assertMap($pattern, $matchMap);
586609
}
587610

611+
public function testExceptionIfPathIsNotAbsolute(): void
612+
{
613+
$this->expectException(RuntimeException::class);
614+
$this->expectExceptionMessage('Path "foo/bar" must be absolute');
615+
FileMatcher::match('foo/bar', new FileMatcherPattern(''));
616+
}
617+
588618
/**
589619
* @param array<FileMatcherPattern,bool> $matchMap
590620
*/

0 commit comments

Comments
 (0)