Skip to content

Commit 7d3fead

Browse files
committed
[GR-18163] Fix RegexpError messages to match CRuby and remove Java error class names
PullRequest: truffleruby/4294
2 parents 5603e8b + 78306b3 commit 7d3fead

File tree

3 files changed

+64
-50
lines changed

3 files changed

+64
-50
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Compatibility:
2828
* Set `$!` when a `Kernel#at_exit` hook raises an exception (#3535, @andrykonchin).
2929
* Support `:buffer` keyword argument to `Array#pack` (#3559, @andrykonchyn).
3030
* Set `RbConfig::CONFIG['host_cpu']` to `arm64` on darwin platform (#3571, @andrykonchin).
31+
* Fix `RegexpError` messages to match CRuby better (#3398, @andrykonchin).
3132

3233
Performance:
3334

spec/ruby/core/regexp/shared/new.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def obj.to_str() [] end
5656
end
5757

5858
it "raises a RegexpError when passed an incorrect regexp" do
59-
-> { Regexp.send(@method, "^[$", 0) }.should raise_error(RegexpError)
59+
-> { Regexp.send(@method, "^[$", 0) }.should raise_error(RegexpError, Regexp.new(Regexp.escape("premature end of char-class: /^[$/")))
6060
end
6161

6262
it "does not set Regexp options if only given one argument" do
@@ -261,7 +261,7 @@ def obj.to_int() ScratchPad.record(:called) end
261261

262262
describe "with escaped characters" do
263263
it "raises a Regexp error if there is a trailing backslash" do
264-
-> { Regexp.send(@method, "\\") }.should raise_error(RegexpError)
264+
-> { Regexp.send(@method, "\\") }.should raise_error(RegexpError, Regexp.new(Regexp.escape("too short escape sequence: /\\/")))
265265
end
266266

267267
it "does not raise a Regexp error if there is an escaped trailing backslash" do
@@ -293,7 +293,7 @@ def obj.to_int() ScratchPad.record(:called) end
293293
end
294294

295295
it "raises a RegexpError if \\x is not followed by any hexadecimal digits" do
296-
-> { Regexp.send(@method, "\\" + "xn") }.should raise_error(RegexpError)
296+
-> { Regexp.send(@method, "\\" + "xn") }.should raise_error(RegexpError, Regexp.new(Regexp.escape("invalid hex escape: /\\xn/")))
297297
end
298298

299299
it "accepts an escaped string interpolation" do
@@ -453,15 +453,15 @@ def obj.to_int() ScratchPad.record(:called) end
453453
end
454454

455455
it "raises a RegexpError if less than four digits are given for \\uHHHH" do
456-
-> { Regexp.send(@method, "\\" + "u304") }.should raise_error(RegexpError)
456+
-> { Regexp.send(@method, "\\" + "u304") }.should raise_error(RegexpError, Regexp.new(Regexp.escape("invalid Unicode escape: /\\u304/")))
457457
end
458458

459459
it "raises a RegexpError if the \\u{} escape is empty" do
460-
-> { Regexp.send(@method, "\\" + "u{}") }.should raise_error(RegexpError)
460+
-> { Regexp.send(@method, "\\" + "u{}") }.should raise_error(RegexpError, Regexp.new(Regexp.escape("invalid Unicode list: /\\u{}/")))
461461
end
462462

463463
it "raises a RegexpError if more than six hexadecimal digits are given" do
464-
-> { Regexp.send(@method, "\\" + "u{0ffffff}") }.should raise_error(RegexpError)
464+
-> { Regexp.send(@method, "\\" + "u{0ffffff}") }.should raise_error(RegexpError, Regexp.new(Regexp.escape("invalid Unicode range: /\\u{0ffffff}/")))
465465
end
466466

467467
it "returns a Regexp with US-ASCII encoding if only 7-bit ASCII characters are present regardless of the input String's encoding" do

src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java

Lines changed: 57 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
import org.truffleruby.core.string.TStringWithEncoding;
6161
import org.truffleruby.core.string.StringSupport;
6262
import org.truffleruby.core.string.StringUtils;
63-
import org.truffleruby.language.backtrace.BacktraceFormatter;
6463
import org.truffleruby.language.control.DeferredRaiseException;
6564
import org.truffleruby.language.control.RaiseException;
6665

@@ -85,13 +84,24 @@ public static Regex makeRegexp(RubyDeferredWarnings rubyDeferredWarnings,
8584
? new RegexWarnCallback()
8685
: new RegexWarnDeferredCallback(rubyDeferredWarnings));
8786
} catch (Exception e) {
88-
String errorMessage = getRegexErrorMessage(source, e, options);
87+
String errorMessage = getRegexErrorMessageForException(source, e, options);
8988
throw new DeferredRaiseException(c -> c.getCoreExceptions().regexpError(errorMessage, currentNode));
9089
}
9190
}
9291

93-
public static String getRegexErrorMessage(AbstractTruffleString source, Exception e, RegexpOptions options) {
94-
return BacktraceFormatter.formatJavaThrowableMessage(e) + ": /" + source + "/" + options.toOptionsString();
92+
private static String getRegexErrorMessageForException(AbstractTruffleString source, Exception e,
93+
RegexpOptions options) {
94+
String message = e.getMessage();
95+
96+
if (message == null) {
97+
message = "<no message>";
98+
}
99+
100+
return formatRegexErrorMessage(message, source, options.toOptionsString());
101+
}
102+
103+
private static String formatRegexErrorMessage(String error, AbstractTruffleString source, String options) {
104+
return error + ": /" + source + "/" + options;
95105
}
96106

97107
@TruffleBoundary
@@ -112,7 +122,7 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
112122
while (p < end) {
113123
final int cl = strInEnc.characterLength(p - offset);
114124
if (cl <= 0) {
115-
raisePreprocessError("invalid multibyte character", mode);
125+
raisePreprocessError("invalid multibyte character", str, mode);
116126
}
117127
if (cl > 1 || (bytes[p] & 0x80) != 0) {
118128
if (to != null) {
@@ -122,15 +132,15 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
122132
if (encp[0] == null) {
123133
encp[0] = enc;
124134
} else if (encp[0] != enc) {
125-
raisePreprocessError("non ASCII character in UTF-8 regexp", mode);
135+
raisePreprocessError("non ASCII character in UTF-8 regexp", str, mode);
126136
}
127137
continue;
128138
}
129139
int c;
130140
switch (c = bytes[p++] & 0xff) {
131141
case '\\':
132142
if (p == end) {
133-
raisePreprocessError("too short escape sequence", mode);
143+
raisePreprocessError("too short escape sequence", str, mode);
134144
}
135145

136146
switch (c = bytes[p++] & 0xff) {
@@ -160,7 +170,7 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
160170
buf = new byte[1];
161171
}
162172
int pbeg = p;
163-
p = readEscapedByte(buf, 0, bytes, p, end, mode);
173+
p = readEscapedByte(buf, 0, bytes, p, end, str, mode);
164174
c = buf[0];
165175
if (c == -1) {
166176
return false;
@@ -169,22 +179,22 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
169179
to.append(bytes, pbeg, p - pbeg);
170180
}
171181
} else {
172-
p = unescapeEscapedNonAscii(to, bytes, p, end, enc, encp, mode);
182+
p = unescapeEscapedNonAscii(to, bytes, p, end, enc, encp, str, mode);
173183
}
174184
break;
175185

176186
case 'u':
177187
if (p == end) {
178-
raisePreprocessError("too short escape sequence", mode);
188+
raisePreprocessError("too short escape sequence", str, mode);
179189
}
180190
if (bytes[p] == (byte) '{') { /* \\u{H HH HHH HHHH HHHHH HHHHHH ...} */
181191
p++;
182-
p = unescapeUnicodeList(to, bytes, p, end, encp, mode);
192+
p = unescapeUnicodeList(to, bytes, p, end, encp, str, mode);
183193
if (p == end || bytes[p++] != (byte) '}') {
184-
raisePreprocessError("invalid Unicode list", mode);
194+
raisePreprocessError("invalid Unicode list", str, mode);
185195
}
186196
} else { /* \\uHHHH */
187-
p = unescapeUnicodeBmp(to, bytes, p, end, encp, mode);
197+
p = unescapeUnicodeBmp(to, bytes, p, end, encp, str, mode);
188198
}
189199
break;
190200
case 'p': /* \p{Hiragana} */
@@ -217,21 +227,23 @@ private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding s
217227
}
218228

219229
private static int unescapeUnicodeBmp(TStringBuilder to, byte[] bytes, int p, int end,
220-
RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
230+
RubyEncoding[] encp, TStringWithEncoding source, RegexpSupport.ErrorMode mode)
231+
throws DeferredRaiseException {
221232
if (p + 4 > end) {
222-
raisePreprocessError("invalid Unicode escape", mode);
233+
raisePreprocessError("invalid Unicode escape", source, mode);
223234
}
224235
int code = StringSupport.scanHex(bytes, p, 4);
225236
int len = StringSupport.hexLength(bytes, p, 4);
226237
if (len != 4) {
227-
raisePreprocessError("invalid Unicode escape", mode);
238+
raisePreprocessError("invalid Unicode escape", source, mode);
228239
}
229-
appendUtf8(to, code, encp, mode);
240+
appendUtf8(to, code, encp, source, mode);
230241
return p + 4;
231242
}
232243

233244
private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, int end,
234-
RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
245+
RubyEncoding[] encp, TStringWithEncoding source, RegexpSupport.ErrorMode mode)
246+
throws DeferredRaiseException {
235247
while (p < end && StringSupport.isAsciiSpace(bytes[p] & 0xff)) {
236248
p++;
237249
}
@@ -244,11 +256,11 @@ private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, i
244256
break;
245257
}
246258
if (len > 6) {
247-
raisePreprocessError("invalid Unicode range", mode);
259+
raisePreprocessError("invalid Unicode range", source, mode);
248260
}
249261
p += len;
250262
if (to != null) {
251-
appendUtf8(to, code, encp, mode);
263+
appendUtf8(to, code, encp, source, mode);
252264
}
253265
hasUnicode = true;
254266
while (p < end && StringSupport.isAsciiSpace(bytes[p] & 0xff)) {
@@ -257,14 +269,14 @@ private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, i
257269
}
258270

259271
if (!hasUnicode) {
260-
raisePreprocessError("invalid Unicode list", mode);
272+
raisePreprocessError("invalid Unicode list", source, mode);
261273
}
262274
return p;
263275
}
264276

265277
private static void appendUtf8(TStringBuilder to, int code, RubyEncoding[] enc,
266-
RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
267-
checkUnicodeRange(code, mode);
278+
TStringWithEncoding source, RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
279+
checkUnicodeRange(code, source, mode);
268280

269281
if (code < 0x80) {
270282
if (to != null) {
@@ -278,7 +290,7 @@ private static void appendUtf8(TStringBuilder to, int code, RubyEncoding[] enc,
278290
if (enc[0] == null) {
279291
enc[0] = Encodings.UTF_8;
280292
} else if (enc[0] != Encodings.UTF_8) {
281-
raisePreprocessError("UTF-8 character in non UTF-8 regexp", mode);
293+
raisePreprocessError("UTF-8 character in non UTF-8 regexp", source, mode);
282294
}
283295
}
284296
}
@@ -320,29 +332,29 @@ public static int utf8Decode(byte[] to, int p, int code) {
320332
}
321333
}
322334

323-
private static void checkUnicodeRange(int code, RegexpSupport.ErrorMode mode)
335+
private static void checkUnicodeRange(int code, TStringWithEncoding source, RegexpSupport.ErrorMode mode)
324336
throws DeferredRaiseException {
325337
// Unicode is can be only 21 bits long, int is enough
326338
if ((0xd800 <= code && code <= 0xdfff) /* Surrogates */ || 0x10ffff < code) {
327-
raisePreprocessError("invalid Unicode range", mode);
339+
raisePreprocessError("invalid Unicode range", source, mode);
328340
}
329341
}
330342

331343
private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int p, int end,
332-
RubyEncoding enc, RubyEncoding[] encp, RegexpSupport.ErrorMode mode)
344+
RubyEncoding enc, RubyEncoding[] encp, TStringWithEncoding source, RegexpSupport.ErrorMode mode)
333345
throws DeferredRaiseException {
334346
byte[] chBuf = new byte[enc.jcoding.maxLength()];
335347
int chLen = 0;
336348

337-
p = readEscapedByte(chBuf, chLen++, bytes, p, end, mode);
349+
p = readEscapedByte(chBuf, chLen++, bytes, p, end, source, mode);
338350
while (chLen < enc.jcoding.maxLength() &&
339351
StringSupport.MBCLEN_NEEDMORE_P(StringSupport.characterLength(enc, chBuf, 0, chLen))) {
340-
p = readEscapedByte(chBuf, chLen++, bytes, p, end, mode);
352+
p = readEscapedByte(chBuf, chLen++, bytes, p, end, source, mode);
341353
}
342354

343355
int cl = StringSupport.characterLength(enc, chBuf, 0, chLen);
344356
if (cl == -1) {
345-
raisePreprocessError("invalid multibyte escape", mode); // MBCLEN_INVALID_P
357+
raisePreprocessError("invalid multibyte escape", source, mode); // MBCLEN_INVALID_P
346358
}
347359

348360
if (chLen > 1 || (chBuf[0] & 0x80) != 0) {
@@ -353,7 +365,7 @@ private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int
353365
if (encp[0] == null) {
354366
encp[0] = enc;
355367
} else if (encp[0] != enc) {
356-
raisePreprocessError("escaped non ASCII character in UTF-8 regexp", mode);
368+
raisePreprocessError("escaped non ASCII character in UTF-8 regexp", source, mode);
357369
}
358370
} else {
359371
if (to != null) {
@@ -363,11 +375,12 @@ private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int
363375
return p;
364376
}
365377

366-
public static int raisePreprocessError(String err, RegexpSupport.ErrorMode mode)
378+
public static int raisePreprocessError(String err, TStringWithEncoding source, RegexpSupport.ErrorMode mode)
367379
throws DeferredRaiseException {
368380
switch (mode) {
369381
case RAISE:
370-
throw new DeferredRaiseException(context -> context.getCoreExceptions().regexpError(err, null));
382+
final String message = formatRegexErrorMessage(err, source.tstring, "");
383+
throw new DeferredRaiseException(context -> context.getCoreExceptions().regexpError(message, null));
371384
case PREPROCESS:
372385
throw new DeferredRaiseException(context -> context
373386
.getCoreExceptions()
@@ -381,16 +394,16 @@ public static int raisePreprocessError(String err, RegexpSupport.ErrorMode mode)
381394
@SuppressWarnings("fallthrough")
382395
@SuppressFBWarnings("SF")
383396
public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int end,
384-
RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
397+
TStringWithEncoding source, RegexpSupport.ErrorMode mode) throws DeferredRaiseException {
385398
if (p == end || bytes[p++] != (byte) '\\') {
386-
raisePreprocessError("too short escaped multibyte character", mode);
399+
raisePreprocessError("too short escaped multibyte character", source, mode);
387400
}
388401

389402
boolean metaPrefix = false, ctrlPrefix = false;
390403
int code = 0;
391404
while (true) {
392405
if (p == end) {
393-
raisePreprocessError("too short escape sequence", mode);
406+
raisePreprocessError("too short escape sequence", source, mode);
394407
}
395408

396409
switch (bytes[p++]) {
@@ -439,14 +452,14 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
439452
code = StringSupport.scanHex(bytes, p, hlen);
440453
int len = StringSupport.hexLength(bytes, p, hlen);
441454
if (len < 1) {
442-
raisePreprocessError("invalid hex escape", mode);
455+
raisePreprocessError("invalid hex escape", source, mode);
443456
}
444457
p += len;
445458
break;
446459

447460
case 'M': /* \M-X, \M-\C-X, \M-\cX */
448461
if (metaPrefix) {
449-
raisePreprocessError("duplicate meta escape", mode);
462+
raisePreprocessError("duplicate meta escape", source, mode);
450463
}
451464
metaPrefix = true;
452465
if (p + 1 < end && bytes[p++] == (byte) '-' && (bytes[p] & 0x80) == 0) {
@@ -458,16 +471,16 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
458471
break;
459472
}
460473
}
461-
raisePreprocessError("too short meta escape", mode);
474+
raisePreprocessError("too short meta escape", source, mode);
462475

463476
case 'C': /* \C-X, \C-\M-X */
464477
if (p == end || bytes[p++] != (byte) '-') {
465-
raisePreprocessError("too short control escape", mode);
478+
raisePreprocessError("too short control escape", source, mode);
466479
}
467480

468481
case 'c': /* \cX, \c\M-X */
469482
if (ctrlPrefix) {
470-
raisePreprocessError("duplicate control escape", mode);
483+
raisePreprocessError("duplicate control escape", source, mode);
471484
}
472485
ctrlPrefix = true;
473486
if (p < end && (bytes[p] & 0x80) == 0) {
@@ -479,13 +492,13 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e
479492
break;
480493
}
481494
}
482-
raisePreprocessError("too short control escape", mode);
495+
raisePreprocessError("too short control escape", source, mode);
483496
default:
484-
raisePreprocessError("unexpected escape sequence", mode);
497+
raisePreprocessError("unexpected escape sequence", source, mode);
485498
} // switch
486499

487500
if (code < 0 || code > 0xff) {
488-
raisePreprocessError("invalid escape code", mode);
501+
raisePreprocessError("invalid escape code", source, mode);
489502
}
490503

491504
if (ctrlPrefix) {

0 commit comments

Comments
 (0)