60
60
import org .truffleruby .core .string .TStringWithEncoding ;
61
61
import org .truffleruby .core .string .StringSupport ;
62
62
import org .truffleruby .core .string .StringUtils ;
63
- import org .truffleruby .language .backtrace .BacktraceFormatter ;
64
63
import org .truffleruby .language .control .DeferredRaiseException ;
65
64
import org .truffleruby .language .control .RaiseException ;
66
65
@@ -85,13 +84,24 @@ public static Regex makeRegexp(RubyDeferredWarnings rubyDeferredWarnings,
85
84
? new RegexWarnCallback ()
86
85
: new RegexWarnDeferredCallback (rubyDeferredWarnings ));
87
86
} catch (Exception e ) {
88
- String errorMessage = getRegexErrorMessage (source , e , options );
87
+ String errorMessage = getRegexErrorMessageForException (source , e , options );
89
88
throw new DeferredRaiseException (c -> c .getCoreExceptions ().regexpError (errorMessage , currentNode ));
90
89
}
91
90
}
92
91
93
- public static String getRegexErrorMessage (AbstractTruffleString source , Exception e , RegexpOptions options ) {
94
- return BacktraceFormatter .formatJavaThrowableMessage (e ) + ": /" + source + "/" + options .toOptionsString ();
92
+ private static String getRegexErrorMessageForException (AbstractTruffleString source , Exception e ,
93
+ RegexpOptions options ) {
94
+ String message = e .getMessage ();
95
+
96
+ if (message == null ) {
97
+ message = "<no message>" ;
98
+ }
99
+
100
+ return formatRegexErrorMessage (message , source , options .toOptionsString ());
101
+ }
102
+
103
+ private static String formatRegexErrorMessage (String error , AbstractTruffleString source , String options ) {
104
+ return error + ": /" + source + "/" + options ;
95
105
}
96
106
97
107
@ TruffleBoundary
@@ -112,7 +122,7 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
112
122
while (p < end ) {
113
123
final int cl = strInEnc .characterLength (p - offset );
114
124
if (cl <= 0 ) {
115
- raisePreprocessError ("invalid multibyte character" , mode );
125
+ raisePreprocessError ("invalid multibyte character" , str , mode );
116
126
}
117
127
if (cl > 1 || (bytes [p ] & 0x80 ) != 0 ) {
118
128
if (to != null ) {
@@ -122,15 +132,15 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
122
132
if (encp [0 ] == null ) {
123
133
encp [0 ] = enc ;
124
134
} else if (encp [0 ] != enc ) {
125
- raisePreprocessError ("non ASCII character in UTF-8 regexp" , mode );
135
+ raisePreprocessError ("non ASCII character in UTF-8 regexp" , str , mode );
126
136
}
127
137
continue ;
128
138
}
129
139
int c ;
130
140
switch (c = bytes [p ++] & 0xff ) {
131
141
case '\\' :
132
142
if (p == end ) {
133
- raisePreprocessError ("too short escape sequence" , mode );
143
+ raisePreprocessError ("too short escape sequence" , str , mode );
134
144
}
135
145
136
146
switch (c = bytes [p ++] & 0xff ) {
@@ -160,7 +170,7 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
160
170
buf = new byte [1 ];
161
171
}
162
172
int pbeg = p ;
163
- p = readEscapedByte (buf , 0 , bytes , p , end , mode );
173
+ p = readEscapedByte (buf , 0 , bytes , p , end , str , mode );
164
174
c = buf [0 ];
165
175
if (c == -1 ) {
166
176
return false ;
@@ -169,22 +179,22 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
169
179
to .append (bytes , pbeg , p - pbeg );
170
180
}
171
181
} else {
172
- p = unescapeEscapedNonAscii (to , bytes , p , end , enc , encp , mode );
182
+ p = unescapeEscapedNonAscii (to , bytes , p , end , enc , encp , str , mode );
173
183
}
174
184
break ;
175
185
176
186
case 'u' :
177
187
if (p == end ) {
178
- raisePreprocessError ("too short escape sequence" , mode );
188
+ raisePreprocessError ("too short escape sequence" , str , mode );
179
189
}
180
190
if (bytes [p ] == (byte ) '{' ) { /* \\u{H HH HHH HHHH HHHHH HHHHHH ...} */
181
191
p ++;
182
- p = unescapeUnicodeList (to , bytes , p , end , encp , mode );
192
+ p = unescapeUnicodeList (to , bytes , p , end , encp , str , mode );
183
193
if (p == end || bytes [p ++] != (byte ) '}' ) {
184
- raisePreprocessError ("invalid Unicode list" , mode );
194
+ raisePreprocessError ("invalid Unicode list" , str , mode );
185
195
}
186
196
} else { /* \\uHHHH */
187
- p = unescapeUnicodeBmp (to , bytes , p , end , encp , mode );
197
+ p = unescapeUnicodeBmp (to , bytes , p , end , encp , str , mode );
188
198
}
189
199
break ;
190
200
case 'p' : /* \p{Hiragana} */
@@ -217,21 +227,23 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
217
227
}
218
228
219
229
private static int unescapeUnicodeBmp (TStringBuilder to , byte [] bytes , int p , int end ,
220
- RubyEncoding [] encp , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
230
+ RubyEncoding [] encp , TStringWithEncoding source , RegexpSupport .ErrorMode mode )
231
+ throws DeferredRaiseException {
221
232
if (p + 4 > end ) {
222
- raisePreprocessError ("invalid Unicode escape" , mode );
233
+ raisePreprocessError ("invalid Unicode escape" , source , mode );
223
234
}
224
235
int code = StringSupport .scanHex (bytes , p , 4 );
225
236
int len = StringSupport .hexLength (bytes , p , 4 );
226
237
if (len != 4 ) {
227
- raisePreprocessError ("invalid Unicode escape" , mode );
238
+ raisePreprocessError ("invalid Unicode escape" , source , mode );
228
239
}
229
- appendUtf8 (to , code , encp , mode );
240
+ appendUtf8 (to , code , encp , source , mode );
230
241
return p + 4 ;
231
242
}
232
243
233
244
private static int unescapeUnicodeList (TStringBuilder to , byte [] bytes , int p , int end ,
234
- RubyEncoding [] encp , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
245
+ RubyEncoding [] encp , TStringWithEncoding source , RegexpSupport .ErrorMode mode )
246
+ throws DeferredRaiseException {
235
247
while (p < end && StringSupport .isAsciiSpace (bytes [p ] & 0xff )) {
236
248
p ++;
237
249
}
@@ -244,11 +256,11 @@ private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, i
244
256
break ;
245
257
}
246
258
if (len > 6 ) {
247
- raisePreprocessError ("invalid Unicode range" , mode );
259
+ raisePreprocessError ("invalid Unicode range" , source , mode );
248
260
}
249
261
p += len ;
250
262
if (to != null ) {
251
- appendUtf8 (to , code , encp , mode );
263
+ appendUtf8 (to , code , encp , source , mode );
252
264
}
253
265
hasUnicode = true ;
254
266
while (p < end && StringSupport .isAsciiSpace (bytes [p ] & 0xff )) {
@@ -257,14 +269,14 @@ private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, i
257
269
}
258
270
259
271
if (!hasUnicode ) {
260
- raisePreprocessError ("invalid Unicode list" , mode );
272
+ raisePreprocessError ("invalid Unicode list" , source , mode );
261
273
}
262
274
return p ;
263
275
}
264
276
265
277
private static void appendUtf8 (TStringBuilder to , int code , RubyEncoding [] enc ,
266
- RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
267
- checkUnicodeRange (code , mode );
278
+ TStringWithEncoding source , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
279
+ checkUnicodeRange (code , source , mode );
268
280
269
281
if (code < 0x80 ) {
270
282
if (to != null ) {
@@ -278,7 +290,7 @@ private static void appendUtf8(TStringBuilder to, int code, RubyEncoding[] enc,
278
290
if (enc [0 ] == null ) {
279
291
enc [0 ] = Encodings .UTF_8 ;
280
292
} else if (enc [0 ] != Encodings .UTF_8 ) {
281
- raisePreprocessError ("UTF-8 character in non UTF-8 regexp" , mode );
293
+ raisePreprocessError ("UTF-8 character in non UTF-8 regexp" , source , mode );
282
294
}
283
295
}
284
296
}
@@ -320,29 +332,29 @@ public static int utf8Decode(byte[] to, int p, int code) {
320
332
}
321
333
}
322
334
323
- private static void checkUnicodeRange (int code , RegexpSupport .ErrorMode mode )
335
+ private static void checkUnicodeRange (int code , TStringWithEncoding source , RegexpSupport .ErrorMode mode )
324
336
throws DeferredRaiseException {
325
337
// Unicode is can be only 21 bits long, int is enough
326
338
if ((0xd800 <= code && code <= 0xdfff ) /* Surrogates */ || 0x10ffff < code ) {
327
- raisePreprocessError ("invalid Unicode range" , mode );
339
+ raisePreprocessError ("invalid Unicode range" , source , mode );
328
340
}
329
341
}
330
342
331
343
private static int unescapeEscapedNonAscii (TStringBuilder to , byte [] bytes , int p , int end ,
332
- RubyEncoding enc , RubyEncoding [] encp , RegexpSupport .ErrorMode mode )
344
+ RubyEncoding enc , RubyEncoding [] encp , TStringWithEncoding source , RegexpSupport .ErrorMode mode )
333
345
throws DeferredRaiseException {
334
346
byte [] chBuf = new byte [enc .jcoding .maxLength ()];
335
347
int chLen = 0 ;
336
348
337
- p = readEscapedByte (chBuf , chLen ++, bytes , p , end , mode );
349
+ p = readEscapedByte (chBuf , chLen ++, bytes , p , end , source , mode );
338
350
while (chLen < enc .jcoding .maxLength () &&
339
351
StringSupport .MBCLEN_NEEDMORE_P (StringSupport .characterLength (enc , chBuf , 0 , chLen ))) {
340
- p = readEscapedByte (chBuf , chLen ++, bytes , p , end , mode );
352
+ p = readEscapedByte (chBuf , chLen ++, bytes , p , end , source , mode );
341
353
}
342
354
343
355
int cl = StringSupport .characterLength (enc , chBuf , 0 , chLen );
344
356
if (cl == -1 ) {
345
- raisePreprocessError ("invalid multibyte escape" , mode ); // MBCLEN_INVALID_P
357
+ raisePreprocessError ("invalid multibyte escape" , source , mode ); // MBCLEN_INVALID_P
346
358
}
347
359
348
360
if (chLen > 1 || (chBuf [0 ] & 0x80 ) != 0 ) {
@@ -353,7 +365,7 @@ private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int
353
365
if (encp [0 ] == null ) {
354
366
encp [0 ] = enc ;
355
367
} else if (encp [0 ] != enc ) {
356
- raisePreprocessError ("escaped non ASCII character in UTF-8 regexp" , mode );
368
+ raisePreprocessError ("escaped non ASCII character in UTF-8 regexp" , source , mode );
357
369
}
358
370
} else {
359
371
if (to != null ) {
@@ -363,11 +375,12 @@ private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int
363
375
return p ;
364
376
}
365
377
366
- public static int raisePreprocessError (String err , RegexpSupport .ErrorMode mode )
378
+ public static int raisePreprocessError (String err , TStringWithEncoding source , RegexpSupport .ErrorMode mode )
367
379
throws DeferredRaiseException {
368
380
switch (mode ) {
369
381
case RAISE :
370
- throw new DeferredRaiseException (context -> context .getCoreExceptions ().regexpError (err , null ));
382
+ final String message = formatRegexErrorMessage (err , source .tstring , "" );
383
+ throw new DeferredRaiseException (context -> context .getCoreExceptions ().regexpError (message , null ));
371
384
case PREPROCESS :
372
385
throw new DeferredRaiseException (context -> context
373
386
.getCoreExceptions ()
@@ -381,16 +394,16 @@ public static int raisePreprocessError(String err, RegexpSupport.ErrorMode mode)
381
394
@ SuppressWarnings ("fallthrough" )
382
395
@ SuppressFBWarnings ("SF" )
383
396
public static int readEscapedByte (byte [] to , int toP , byte [] bytes , int p , int end ,
384
- RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
397
+ TStringWithEncoding source , RegexpSupport .ErrorMode mode ) throws DeferredRaiseException {
385
398
if (p == end || bytes [p ++] != (byte ) '\\' ) {
386
- raisePreprocessError ("too short escaped multibyte character" , mode );
399
+ raisePreprocessError ("too short escaped multibyte character" , source , mode );
387
400
}
388
401
389
402
boolean metaPrefix = false , ctrlPrefix = false ;
390
403
int code = 0 ;
391
404
while (true ) {
392
405
if (p == end ) {
393
- raisePreprocessError ("too short escape sequence" , mode );
406
+ raisePreprocessError ("too short escape sequence" , source , mode );
394
407
}
395
408
396
409
switch (bytes [p ++]) {
@@ -439,14 +452,14 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
439
452
code = StringSupport .scanHex (bytes , p , hlen );
440
453
int len = StringSupport .hexLength (bytes , p , hlen );
441
454
if (len < 1 ) {
442
- raisePreprocessError ("invalid hex escape" , mode );
455
+ raisePreprocessError ("invalid hex escape" , source , mode );
443
456
}
444
457
p += len ;
445
458
break ;
446
459
447
460
case 'M' : /* \M-X, \M-\C-X, \M-\cX */
448
461
if (metaPrefix ) {
449
- raisePreprocessError ("duplicate meta escape" , mode );
462
+ raisePreprocessError ("duplicate meta escape" , source , mode );
450
463
}
451
464
metaPrefix = true ;
452
465
if (p + 1 < end && bytes [p ++] == (byte ) '-' && (bytes [p ] & 0x80 ) == 0 ) {
@@ -458,16 +471,16 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
458
471
break ;
459
472
}
460
473
}
461
- raisePreprocessError ("too short meta escape" , mode );
474
+ raisePreprocessError ("too short meta escape" , source , mode );
462
475
463
476
case 'C' : /* \C-X, \C-\M-X */
464
477
if (p == end || bytes [p ++] != (byte ) '-' ) {
465
- raisePreprocessError ("too short control escape" , mode );
478
+ raisePreprocessError ("too short control escape" , source , mode );
466
479
}
467
480
468
481
case 'c' : /* \cX, \c\M-X */
469
482
if (ctrlPrefix ) {
470
- raisePreprocessError ("duplicate control escape" , mode );
483
+ raisePreprocessError ("duplicate control escape" , source , mode );
471
484
}
472
485
ctrlPrefix = true ;
473
486
if (p < end && (bytes [p ] & 0x80 ) == 0 ) {
@@ -479,13 +492,13 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
479
492
break ;
480
493
}
481
494
}
482
- raisePreprocessError ("too short control escape" , mode );
495
+ raisePreprocessError ("too short control escape" , source , mode );
483
496
default :
484
- raisePreprocessError ("unexpected escape sequence" , mode );
497
+ raisePreprocessError ("unexpected escape sequence" , source , mode );
485
498
} // switch
486
499
487
500
if (code < 0 || code > 0xff ) {
488
- raisePreprocessError ("invalid escape code" , mode );
501
+ raisePreprocessError ("invalid escape code" , source , mode );
489
502
}
490
503
491
504
if (ctrlPrefix ) {
0 commit comments