12
12
use function array_key_last ;
13
13
use function array_pop ;
14
14
use function count ;
15
+ use function ctype_alpha ;
15
16
use function preg_match ;
16
17
use function preg_quote ;
17
18
use function sprintf ;
39
40
private const T_GREEDY_GLOBSTAR = 'greedy_globstar ' ;
40
41
private const T_QUERY = 'query ' ;
41
42
private const T_GLOBSTAR = 'globstar ' ;
43
+ private const T_COLON = 'colon ' ;
44
+ private const T_CHAR_CLASS = 'char_class ' ;
42
45
43
46
public static function match (string $ path , FileMatcherPattern $ pattern ): bool
44
47
{
@@ -80,6 +83,7 @@ public static function toRegEx($glob, $flags = 0): string
80
83
self ::T_BRACKET_OPEN => '[ ' ,
81
84
self ::T_BRACKET_CLOSE => '] ' ,
82
85
self ::T_HYPHEN => '- ' ,
86
+ self ::T_CHAR_CLASS => '[: ' . $ token [1 ] . ':] ' ,
83
87
default => '' ,
84
88
};
85
89
}
@@ -121,6 +125,7 @@ private static function tokenize(string $glob): array
121
125
'* ' => [self ::T_ASTERIX , $ c ],
122
126
'/ ' => [self ::T_SLASH , $ c ],
123
127
'\\' => [self ::T_BACKSLASH , $ c ],
128
+ ': ' => [self ::T_COLON , $ c ],
124
129
default => [self ::T_CHAR , $ c ],
125
130
};
126
131
}
@@ -135,13 +140,14 @@ private static function tokenize(string $glob): array
135
140
*/
136
141
private static function processTokens (array $ tokens ): array
137
142
{
138
- $ resolved = [];
139
- $ escaped = false ;
143
+ $ resolved = [];
144
+ $ escaped = false ;
140
145
$ bracketOpen = false ;
141
- $ brackets = [];
146
+ $ brackets = [];
142
147
143
148
for ($ offset = 0 ; $ offset < count ($ tokens ); $ offset ++) {
144
149
[$ type , $ char ] = $ tokens [$ offset ];
150
+ $ nextType = $ tokens [$ offset + 1 ][0 ] ?? null ;
145
151
146
152
if ($ type === self ::T_BACKSLASH && false === $ escaped ) {
147
153
$ escaped = true ;
@@ -205,27 +211,50 @@ private static function processTokens(array $tokens): array
205
211
continue ;
206
212
}
207
213
208
- if ($ type === self ::T_BRACKET_OPEN && $ tokens [ $ offset + 1 ][ 0 ] === self ::T_BRACKET_CLOSE ) {
214
+ if ($ type === self ::T_BRACKET_OPEN && $ nextType === self ::T_BRACKET_CLOSE ) {
209
215
$ bracketOpen = true ;
210
- $ resolved [] = [self ::T_BRACKET_OPEN , '[ ' ];
211
- $ brackets [] = array_key_last ($ resolved );
212
- $ resolved [] = [self ::T_CHAR , '] ' ];
213
- $ offset += 1 ;
216
+ $ resolved [] = [self ::T_BRACKET_OPEN , '[ ' ];
217
+ $ brackets [] = array_key_last ($ resolved );
218
+ $ resolved [] = [self ::T_CHAR , '] ' ];
219
+ $ offset++ ;
214
220
215
221
continue ;
216
222
}
217
223
224
+ if ($ bracketOpen && $ type === self ::T_BRACKET_OPEN && $ nextType === self ::T_COLON ) {
225
+ // this looks like a named [:character:] class
226
+ $ class = '' ;
227
+ $ offset += 2 ;
228
+
229
+ // parse the character class name
230
+ while (ctype_alpha ($ tokens [$ offset ][1 ])) {
231
+ $ class .= $ tokens [$ offset ++][1 ];
232
+ }
233
+
234
+ // if followed by a `:` then it's a character class
235
+ if ($ tokens [$ offset ][0 ] === self ::T_COLON ) {
236
+ $ offset ++;
237
+ $ resolved [] = [self ::T_CHAR_CLASS , $ class ];
238
+
239
+ continue ;
240
+ }
241
+
242
+ // otherwise it's a harmless literal
243
+ $ resolved [] = [self ::T_CHAR , ': ' . $ class ];
244
+ }
245
+
218
246
if ($ bracketOpen === true && $ type === self ::T_BRACKET_OPEN ) {
219
247
// if bracket is already open, interpret everything as a
220
248
// literal char
221
249
$ resolved [] = [self ::T_CHAR , $ char ];
250
+
222
251
continue ;
223
252
}
224
253
225
254
if ($ bracketOpen === false && $ type === self ::T_BRACKET_OPEN ) {
226
255
$ bracketOpen = true ;
227
- $ resolved [] = [$ type , $ char ];
228
- $ brackets [] = array_key_last ($ resolved );
256
+ $ resolved [] = [$ type , $ char ];
257
+ $ brackets [] = array_key_last ($ resolved );
229
258
230
259
continue ;
231
260
}
0 commit comments