|
75 | 75 | @CoreModule("Truffle::RegexpOperations")
|
76 | 76 | public class TruffleRegexpNodes {
|
77 | 77 |
|
78 |
| - public static Encoding checkEncoding(RubyRegexp regexp, Encoding strEnc, CodeRange codeRange) { |
79 |
| - final Encoding regexEnc = regexp.regex.getEncoding(); |
| 78 | + // rb_reg_prepare_enc ... mostly. Some of the error checks are performed by callers of this method. |
| 79 | + public abstract static class CheckEncodingNode extends RubyContextNode { |
80 | 80 |
|
81 |
| - if (strEnc == regexEnc) { |
82 |
| - return regexEnc; |
83 |
| - } else if (regexEnc == USASCIIEncoding.INSTANCE && codeRange == CodeRange.CR_7BIT) { |
84 |
| - return regexEnc; |
85 |
| - } else if (strEnc.isAsciiCompatible() && regexp.options.isFixed()) { |
86 |
| - return regexEnc; |
| 81 | + @Child RopeNodes.CodeRangeNode codeRangeNode = RopeNodes.CodeRangeNode.create(); |
| 82 | + @Child RubyStringLibrary stringLibrary = RubyStringLibrary.getFactory().createDispatched(2); |
| 83 | + |
| 84 | + public static CheckEncodingNode create() { |
| 85 | + return TruffleRegexpNodesFactory.CheckEncodingNodeGen.create(); |
| 86 | + } |
| 87 | + |
| 88 | + public abstract Encoding executeCheckEncoding(RubyRegexp regexp, Object str); |
| 89 | + |
| 90 | + @Specialization(guards = { |
| 91 | + "!isSameEncoding(regexp, string)", |
| 92 | + "isUSASCII(regexp, string)" |
| 93 | + }) |
| 94 | + protected Encoding checkEncodingAsciiOnly(RubyRegexp regexp, Object string) { |
| 95 | + return USASCIIEncoding.INSTANCE; |
| 96 | + } |
| 97 | + |
| 98 | + @Specialization(guards = { |
| 99 | + "isSameEncoding(regexp, string)" |
| 100 | + }) |
| 101 | + protected Encoding checkEncodingSameEncoding(RubyRegexp regexp, Object string) { |
| 102 | + return regexp.regex.getEncoding(); |
| 103 | + } |
| 104 | + |
| 105 | + @Specialization(guards = { |
| 106 | + "!isSameEncoding(regexp, string)", |
| 107 | + "!isUSASCII(regexp, string)", |
| 108 | + "isFixedEncoding(regexp, string)", |
| 109 | + }) |
| 110 | + protected Encoding checkEncodingFixedEncoding(RubyRegexp regexp, Object string) { |
| 111 | + return regexp.regex.getEncoding(); |
| 112 | + } |
| 113 | + |
| 114 | + @Specialization(guards = { |
| 115 | + "!isSameEncoding(regexp, string)", |
| 116 | + "!isUSASCII(regexp, string)", |
| 117 | + "!isFixedEncoding(regexp, string)" |
| 118 | + }) |
| 119 | + protected Encoding fallback(RubyRegexp regexp, Object string) { |
| 120 | + return stringEncoding(string); |
| 121 | + } |
| 122 | + |
| 123 | + protected boolean isUSASCII(RubyRegexp regexp, Object string) { |
| 124 | + return regexp.regex.getEncoding() == USASCIIEncoding.INSTANCE && |
| 125 | + codeRangeNode.execute(stringLibrary.getRope(string)) == CodeRange.CR_7BIT; |
| 126 | + } |
| 127 | + |
| 128 | + protected boolean isFixedEncoding(RubyRegexp regexp, Object string) { |
| 129 | + return regexp.options.isFixed() && stringLibrary.getRope(string).getEncoding().isAsciiCompatible(); |
| 130 | + } |
| 131 | + |
| 132 | + protected boolean isSameEncoding(RubyRegexp regexp, Object string) { |
| 133 | + return regexp.regex.getEncoding() == stringLibrary.getRope(string).getEncoding(); |
| 134 | + } |
| 135 | + |
| 136 | + protected Encoding stringEncoding(Object string) { |
| 137 | + return stringLibrary.getRope(string).getEncoding(); |
87 | 138 | }
|
88 |
| - return strEnc; |
89 | 139 | }
|
90 | 140 |
|
91 | 141 | @TruffleBoundary
|
@@ -191,19 +241,20 @@ public RubyRegexp createRegexp(Rope pattern) throws DeferredRaiseException {
|
191 | 241 | @CoreMethod(names = "select_encoding", onSingleton = true, required = 3)
|
192 | 242 | public abstract static class SelectEncodingNode extends CoreMethodArrayArgumentsNode {
|
193 | 243 |
|
194 |
| - @Child RopeNodes.CodeRangeNode codeRangeNode; |
| 244 | + @Child CheckEncodingNode checkEncodingNode; |
195 | 245 |
|
196 | 246 | @Specialization(guards = "libString.isRubyString(str)")
|
197 | 247 | protected RubyEncoding selectEncoding(RubyRegexp re, Object str, boolean encodingConversion,
|
| 248 | + @Cached TruffleRegexpNodes.CheckEncodingNode checkEncodingNode, |
198 | 249 | @CachedLibrary(limit = "2") RubyStringLibrary libString) {
|
199 | 250 | Encoding encoding;
|
200 | 251 | if (encodingConversion) {
|
201 |
| - Rope stringRope = libString.getRope(str); |
202 |
| - if (codeRangeNode == null) { |
| 252 | + if (checkEncodingNode == null) { |
203 | 253 | CompilerDirectives.transferToInterpreterAndInvalidate();
|
204 |
| - codeRangeNode = insert(RopeNodes.CodeRangeNode.create()); |
| 254 | + checkEncodingNode = insert(CheckEncodingNode.create()); |
205 | 255 | }
|
206 |
| - encoding = checkEncoding(re, stringRope.getEncoding(), codeRangeNode.execute(stringRope)); |
| 256 | + |
| 257 | + encoding = checkEncodingNode.executeCheckEncoding(re, str); |
207 | 258 | } else {
|
208 | 259 | encoding = re.regex.getEncoding();
|
209 | 260 | }
|
@@ -374,17 +425,18 @@ protected Object matchInRegion(
|
374 | 425 | RubyRegexp regexp, Object string, int fromPos, int toPos, boolean atStart, int startPos,
|
375 | 426 | @Cached RopeNodes.BytesNode bytesNode,
|
376 | 427 | @Cached TruffleRegexpNodes.MatchNode matchNode,
|
| 428 | + @Cached TruffleRegexpNodes.CheckEncodingNode checkEncodingNode, |
377 | 429 | @CachedLibrary(limit = "2") RubyStringLibrary libString) {
|
378 |
| - Rope rope = libString.getRope(string); |
379 |
| - final Encoding enc = checkEncoding(regexp, rope.getEncoding(), rope.getCodeRange()); |
| 430 | + final Rope rope = libString.getRope(string); |
| 431 | + final Encoding enc = checkEncodingNode.executeCheckEncoding(regexp, string); |
380 | 432 | Regex regex = regexp.regex;
|
381 | 433 |
|
382 | 434 | if (regex.getEncoding() != enc) {
|
383 |
| - EncodingCache encodingCache = regexp.cachedEncodings; |
| 435 | + final EncodingCache encodingCache = regexp.cachedEncodings; |
384 | 436 | regex = encodingCache.getOrCreate(enc, e -> makeRegexpForEncoding(getContext(), regexp, e, this));
|
385 | 437 | }
|
386 | 438 |
|
387 |
| - Matcher matcher = getMatcher(regex, bytesNode.execute(rope), startPos); |
| 439 | + final Matcher matcher = getMatcher(regex, bytesNode.execute(rope), startPos); |
388 | 440 | return matchNode.execute(regexp, string, matcher, fromPos, toPos, atStart);
|
389 | 441 | }
|
390 | 442 | }
|
|
0 commit comments