Skip to content

Commit a63ba65

Browse files
wildmaplesnirvdrum
andcommitted
Turn TruffleRegexpNodes.checkEncoding into a Node
Co-authored-by: Kevin Menard <kevin.menard@shopify.com>
1 parent b230e45 commit a63ba65

File tree

1 file changed

+70
-18
lines changed

1 file changed

+70
-18
lines changed

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 70 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,67 @@
7575
@CoreModule("Truffle::RegexpOperations")
7676
public class TruffleRegexpNodes {
7777

78-
public static Encoding checkEncoding(RubyRegexp regexp, Encoding strEnc, CodeRange codeRange) {
79-
final Encoding regexEnc = regexp.regex.getEncoding();
78+
// rb_reg_prepare_enc ... mostly. Some of the error checks are performed by callers of this method.
79+
public abstract static class CheckEncodingNode extends RubyContextNode {
8080

81-
if (strEnc == regexEnc) {
82-
return regexEnc;
83-
} else if (regexEnc == USASCIIEncoding.INSTANCE && codeRange == CodeRange.CR_7BIT) {
84-
return regexEnc;
85-
} else if (strEnc.isAsciiCompatible() && regexp.options.isFixed()) {
86-
return regexEnc;
81+
@Child RopeNodes.CodeRangeNode codeRangeNode = RopeNodes.CodeRangeNode.create();
82+
@Child RubyStringLibrary stringLibrary = RubyStringLibrary.getFactory().createDispatched(2);
83+
84+
public static CheckEncodingNode create() {
85+
return TruffleRegexpNodesFactory.CheckEncodingNodeGen.create();
86+
}
87+
88+
public abstract Encoding executeCheckEncoding(RubyRegexp regexp, Object str);
89+
90+
@Specialization(guards = {
91+
"!isSameEncoding(regexp, string)",
92+
"isUSASCII(regexp, string)"
93+
})
94+
protected Encoding checkEncodingAsciiOnly(RubyRegexp regexp, Object string) {
95+
return USASCIIEncoding.INSTANCE;
96+
}
97+
98+
@Specialization(guards = {
99+
"isSameEncoding(regexp, string)"
100+
})
101+
protected Encoding checkEncodingSameEncoding(RubyRegexp regexp, Object string) {
102+
return regexp.regex.getEncoding();
103+
}
104+
105+
@Specialization(guards = {
106+
"!isSameEncoding(regexp, string)",
107+
"!isUSASCII(regexp, string)",
108+
"isFixedEncoding(regexp, string)",
109+
})
110+
protected Encoding checkEncodingFixedEncoding(RubyRegexp regexp, Object string) {
111+
return regexp.regex.getEncoding();
112+
}
113+
114+
@Specialization(guards = {
115+
"!isSameEncoding(regexp, string)",
116+
"!isUSASCII(regexp, string)",
117+
"!isFixedEncoding(regexp, string)"
118+
})
119+
protected Encoding fallback(RubyRegexp regexp, Object string) {
120+
return stringEncoding(string);
121+
}
122+
123+
protected boolean isUSASCII(RubyRegexp regexp, Object string) {
124+
return regexp.regex.getEncoding() == USASCIIEncoding.INSTANCE &&
125+
codeRangeNode.execute(stringLibrary.getRope(string)) == CodeRange.CR_7BIT;
126+
}
127+
128+
protected boolean isFixedEncoding(RubyRegexp regexp, Object string) {
129+
return regexp.options.isFixed() && stringLibrary.getRope(string).getEncoding().isAsciiCompatible();
130+
}
131+
132+
protected boolean isSameEncoding(RubyRegexp regexp, Object string) {
133+
return regexp.regex.getEncoding() == stringLibrary.getRope(string).getEncoding();
134+
}
135+
136+
protected Encoding stringEncoding(Object string) {
137+
return stringLibrary.getRope(string).getEncoding();
87138
}
88-
return strEnc;
89139
}
90140

91141
@TruffleBoundary
@@ -191,19 +241,20 @@ public RubyRegexp createRegexp(Rope pattern) throws DeferredRaiseException {
191241
@CoreMethod(names = "select_encoding", onSingleton = true, required = 3)
192242
public abstract static class SelectEncodingNode extends CoreMethodArrayArgumentsNode {
193243

194-
@Child RopeNodes.CodeRangeNode codeRangeNode;
244+
@Child CheckEncodingNode checkEncodingNode;
195245

196246
@Specialization(guards = "libString.isRubyString(str)")
197247
protected RubyEncoding selectEncoding(RubyRegexp re, Object str, boolean encodingConversion,
248+
@Cached TruffleRegexpNodes.CheckEncodingNode checkEncodingNode,
198249
@CachedLibrary(limit = "2") RubyStringLibrary libString) {
199250
Encoding encoding;
200251
if (encodingConversion) {
201-
Rope stringRope = libString.getRope(str);
202-
if (codeRangeNode == null) {
252+
if (checkEncodingNode == null) {
203253
CompilerDirectives.transferToInterpreterAndInvalidate();
204-
codeRangeNode = insert(RopeNodes.CodeRangeNode.create());
254+
checkEncodingNode = insert(CheckEncodingNode.create());
205255
}
206-
encoding = checkEncoding(re, stringRope.getEncoding(), codeRangeNode.execute(stringRope));
256+
257+
encoding = checkEncodingNode.executeCheckEncoding(re, str);
207258
} else {
208259
encoding = re.regex.getEncoding();
209260
}
@@ -374,17 +425,18 @@ protected Object matchInRegion(
374425
RubyRegexp regexp, Object string, int fromPos, int toPos, boolean atStart, int startPos,
375426
@Cached RopeNodes.BytesNode bytesNode,
376427
@Cached TruffleRegexpNodes.MatchNode matchNode,
428+
@Cached TruffleRegexpNodes.CheckEncodingNode checkEncodingNode,
377429
@CachedLibrary(limit = "2") RubyStringLibrary libString) {
378-
Rope rope = libString.getRope(string);
379-
final Encoding enc = checkEncoding(regexp, rope.getEncoding(), rope.getCodeRange());
430+
final Rope rope = libString.getRope(string);
431+
final Encoding enc = checkEncodingNode.executeCheckEncoding(regexp, string);
380432
Regex regex = regexp.regex;
381433

382434
if (regex.getEncoding() != enc) {
383-
EncodingCache encodingCache = regexp.cachedEncodings;
435+
final EncodingCache encodingCache = regexp.cachedEncodings;
384436
regex = encodingCache.getOrCreate(enc, e -> makeRegexpForEncoding(getContext(), regexp, e, this));
385437
}
386438

387-
Matcher matcher = getMatcher(regex, bytesNode.execute(rope), startPos);
439+
final Matcher matcher = getMatcher(regex, bytesNode.execute(rope), startPos);
388440
return matchNode.execute(regexp, string, matcher, fromPos, toPos, atStart);
389441
}
390442
}

0 commit comments

Comments
 (0)