Skip to content

Commit c9213bc

Browse files
committed
[GR-45043] Cleanup ClassicRegexp to optimize YARPTranslator
PullRequest: truffleruby/4164
2 parents e5f0486 + f1f7bff commit c9213bc

File tree

7 files changed

+108
-149
lines changed

7 files changed

+108
-149
lines changed

src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java

Lines changed: 27 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,8 @@
3636
***** END LICENSE BLOCK *****/
3737
package org.truffleruby.core.regexp;
3838

39-
import static org.truffleruby.core.string.StringUtils.EMPTY_STRING_ARRAY;
40-
4139
import java.nio.charset.StandardCharsets;
4240
import java.util.Arrays;
43-
import java.util.Iterator;
4441

4542
import com.oracle.truffle.api.strings.AbstractTruffleString;
4643
import com.oracle.truffle.api.strings.TruffleStringBuilder;
@@ -49,7 +46,6 @@
4946
import org.jcodings.specific.SJISEncoding;
5047
import org.jcodings.specific.USASCIIEncoding;
5148
import org.jcodings.specific.UTF8Encoding;
52-
import org.joni.NameEntry;
5349
import org.joni.Option;
5450
import org.joni.Regex;
5551
import org.joni.Syntax;
@@ -59,7 +55,6 @@
5955
import org.truffleruby.collections.ByteArrayBuilder;
6056
import org.truffleruby.core.encoding.Encodings;
6157
import org.truffleruby.core.encoding.RubyEncoding;
62-
import org.truffleruby.core.encoding.TStringUtils;
6358
import org.truffleruby.core.string.ATStringWithEncoding;
6459
import org.truffleruby.core.string.TStringBuilder;
6560
import org.truffleruby.core.string.TStringWithEncoding;
@@ -68,25 +63,12 @@
6863
import org.truffleruby.language.backtrace.BacktraceFormatter;
6964
import org.truffleruby.language.control.DeferredRaiseException;
7065
import org.truffleruby.language.control.RaiseException;
71-
import org.truffleruby.parser.ReOptions;
7266

7367
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
7468
import com.oracle.truffle.api.nodes.Node;
7569
import org.truffleruby.parser.RubyDeferredWarnings;
7670

77-
public final class ClassicRegexp implements ReOptions {
78-
79-
private final Regex pattern;
80-
private final TStringWithEncoding str;
81-
private RegexpOptions options;
82-
83-
public void setLiteral() {
84-
options = options.setLiteral(true);
85-
}
86-
87-
public Encoding getEncoding() {
88-
return pattern.getEncoding();
89-
}
71+
public final class ClassicRegexp {
9072

9173
public static Regex makeRegexp(RubyDeferredWarnings rubyDeferredWarnings,
9274
TStringBuilder processedSource, RegexpOptions options,
@@ -112,34 +94,6 @@ public static String getRegexErrorMessage(AbstractTruffleString source, Exceptio
11294
return BacktraceFormatter.formatJavaThrowableMessage(e) + ": /" + source + "/" + options.toOptionsString();
11395
}
11496

115-
private static Regex getRegexpFromCache(TStringBuilder bytes, RubyEncoding encoding, RegexpOptions options,
116-
AbstractTruffleString source) throws DeferredRaiseException {
117-
final Regex newRegex = makeRegexp(null, bytes, options, encoding, source, null);
118-
newRegex.setUserObject(bytes);
119-
return newRegex;
120-
}
121-
122-
public ClassicRegexp(TStringWithEncoding strEnc, RegexpOptions originalOptions)
123-
throws DeferredRaiseException {
124-
this.options = originalOptions;
125-
126-
if (strEnc.encoding.isDummy) {
127-
throw new UnsupportedOperationException("can't make regexp with dummy encoding");
128-
}
129-
130-
RegexpOptions[] optionsArray = new RegexpOptions[]{ originalOptions };
131-
RubyEncoding[] fixedEnc = new RubyEncoding[]{ null };
132-
TStringBuilder unescaped = preprocess(strEnc, strEnc.encoding, fixedEnc, RegexpSupport.ErrorMode.RAISE);
133-
final RubyEncoding computedEnc = computeRegexpEncoding(optionsArray, strEnc.encoding, fixedEnc);
134-
this.pattern = getRegexpFromCache(
135-
unescaped,
136-
computedEnc,
137-
options,
138-
strEnc.forceEncoding(computedEnc).tstring);
139-
this.options = optionsArray[0];
140-
this.str = strEnc;
141-
}
142-
14397
@TruffleBoundary
14498
@SuppressWarnings("fallthrough")
14599
private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding str, RubyEncoding enc,
@@ -849,34 +803,33 @@ public static void appendOptions(TStringBuilder to, RegexpOptions options) {
849803
}
850804

851805
@SuppressWarnings("unused")
852-
public ByteArrayBuilder toByteArrayBuilder() {
806+
public static TStringWithEncoding toS(TStringWithEncoding source, RegexpOptions options) {
853807
RegexpOptions newOptions = (RegexpOptions) options.clone();
854-
var byteArray = str.getInternalByteArray();
808+
var byteArray = source.getInternalByteArray();
855809
int p = 0;
856810
int len = byteArray.getLength();
857811

858812
TStringBuilder result = TStringBuilder.create(len);
859813
result.append((byte) '(');
860814
result.append((byte) '?');
861815

862-
again: do {
816+
do {
863817
if (len >= 4 && byteArray.get(p) == '(' && byteArray.get(p + 1) == '?') {
864-
boolean err = true;
865818
p += 2;
866-
if ((len -= 2) > 0) {
867-
do {
868-
if (byteArray.get(p) == 'm') {
869-
newOptions = newOptions.setMultiline(true);
870-
} else if (byteArray.get(p) == 'i') {
871-
newOptions = newOptions.setIgnorecase(true);
872-
} else if (byteArray.get(p) == 'x') {
873-
newOptions = newOptions.setExtended(true);
874-
} else {
875-
break;
876-
}
877-
p++;
878-
} while (--len > 0);
879-
}
819+
len -= 2;
820+
do {
821+
if (byteArray.get(p) == 'm') {
822+
newOptions = newOptions.setMultiline(true);
823+
} else if (byteArray.get(p) == 'i') {
824+
newOptions = newOptions.setIgnorecase(true);
825+
} else if (byteArray.get(p) == 'x') {
826+
newOptions = newOptions.setExtended(true);
827+
} else {
828+
break;
829+
}
830+
p++;
831+
} while (--len > 0);
832+
880833
if (len > 1 && byteArray.get(p) == '-') {
881834
++p;
882835
--len;
@@ -897,9 +850,10 @@ public ByteArrayBuilder toByteArrayBuilder() {
897850
if (byteArray.get(p) == ')') {
898851
--len;
899852
++p;
900-
continue again;
853+
continue;
901854
}
902855

856+
boolean err = true;
903857
if (byteArray.get(p) == ':' && byteArray.get(p + len - 1) == ')') {
904858
p++;
905859
try {
@@ -908,7 +862,7 @@ public ByteArrayBuilder toByteArrayBuilder() {
908862
p + byteArray.getOffset(),
909863
p + byteArray.getOffset() + (len -= 2),
910864
Option.DEFAULT,
911-
str.encoding.jcoding,
865+
source.encoding.jcoding,
912866
Syntax.DEFAULT,
913867
new RegexWarnCallback());
914868
err = false;
@@ -920,7 +874,7 @@ public ByteArrayBuilder toByteArrayBuilder() {
920874
if (err) {
921875
newOptions = options;
922876
p = 0;
923-
len = str.byteLength();
877+
len = source.byteLength();
924878
}
925879
}
926880

@@ -939,17 +893,16 @@ public ByteArrayBuilder toByteArrayBuilder() {
939893
}
940894
}
941895
result.append((byte) ':');
942-
appendRegexpString(result, str, p, len);
896+
appendRegexpString(result, source, p, len);
943897

944898
result.append((byte) ')');
945-
result.setEncoding(Encodings.getBuiltInEncoding(getEncoding()));
946-
return result;
947-
//return RubyString.newString(getRuntime(), result, getEncoding()).infectBy(this);
899+
result.setEncoding(source.encoding);
900+
return result.toTStringWithEnc();
948901
} while (true);
949902
}
950903

951904
@TruffleBoundary
952-
public void appendRegexpString(TStringBuilder to, TStringWithEncoding fullStr, int start, int len) {
905+
public static void appendRegexpString(TStringBuilder to, TStringWithEncoding fullStr, int start, int len) {
953906
var str = fullStr.substring(start, len);
954907

955908
final var enc = str.encoding.jcoding;
@@ -995,45 +948,11 @@ public void appendRegexpString(TStringBuilder to, TStringWithEncoding fullStr, i
995948
}
996949
}
997950

998-
public String[] getNames() {
999-
int nameLength = pattern.numberOfNames();
1000-
if (nameLength == 0) {
1001-
return EMPTY_STRING_ARRAY;
1002-
}
1003-
1004-
RubyEncoding encoding = Encodings.getBuiltInEncoding(pattern.getEncoding());
1005-
String[] names = new String[nameLength];
1006-
int j = 0;
1007-
for (Iterator<NameEntry> i = pattern.namedBackrefIterator(); i.hasNext();) {
1008-
NameEntry e = i.next();
1009-
// intern() to improve footprint
1010-
names[j++] = TStringUtils.bytesToJavaStringOrThrow(e.name, e.nameP, e.nameEnd - e.nameP, encoding).intern();
1011-
}
1012-
1013-
return names;
1014-
}
1015-
1016951
// Code that used to be in ParserSupport but copied here as ParserSupport is coupled with the JRuby lexer & parser.
1017952
// Needed until https://github.com/ruby/prism/issues/1997 is fixed.
1018953

1019-
// From ParserSupport#newRegexpNode
1020-
public static TStringWithEncoding findEncodingForRegexpLiteral(TStringWithEncoding regexp, RegexpOptions options,
1021-
RubyEncoding lexerEncoding, Node currentNode) throws DeferredRaiseException {
1022-
TStringWithEncoding meat = regexpFragmentCheck(regexp, options, lexerEncoding, currentNode);
1023-
checkRegexpSyntax(meat, options.withoutOnce());
1024-
return meat;
1025-
}
1026-
1027-
// MRI: reg_fragment_check
1028-
public static TStringWithEncoding regexpFragmentCheck(TStringWithEncoding value, RegexpOptions options,
1029-
RubyEncoding lexerEncoding, Node currentNode) throws DeferredRaiseException {
1030-
final TStringWithEncoding strEnc = setRegexpEncoding(value, options, lexerEncoding, currentNode);
1031-
ClassicRegexp.preprocessCheck(strEnc);
1032-
return strEnc;
1033-
}
1034-
1035954
// MRI: reg_fragment_setenc_gen
1036-
private static TStringWithEncoding setRegexpEncoding(TStringWithEncoding value, RegexpOptions options,
955+
public static TStringWithEncoding setRegexpEncoding(TStringWithEncoding value, RegexpOptions options,
1037956
RubyEncoding lexerEncoding, Node currentNode) throws DeferredRaiseException {
1038957
options = options.setup();
1039958
final RubyEncoding optionsEncoding = options.getEncoding() == null
@@ -1067,12 +986,6 @@ private static TStringWithEncoding setRegexpEncoding(TStringWithEncoding value,
1067986
return value;
1068987
}
1069988

1070-
private static ClassicRegexp checkRegexpSyntax(TStringWithEncoding value, RegexpOptions options)
1071-
throws DeferredRaiseException {
1072-
// This is only for syntax checking but this will as a side effect create an entry in the regexp cache.
1073-
return new ClassicRegexp(value, options);
1074-
}
1075-
1076989
private static char optionsEncodingChar(Encoding optionEncoding) {
1077990
if (optionEncoding == USASCIIEncoding.INSTANCE) {
1078991
return 'n';

src/main/java/org/truffleruby/core/regexp/RegexpNodes.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,8 @@ RubyString toS(RubyRegexp regexp) {
122122

123123
@TruffleBoundary
124124
protected TStringWithEncoding createTString(RubyRegexp regexp) {
125-
final ClassicRegexp classicRegexp;
126-
127-
try {
128-
classicRegexp = new ClassicRegexp(
129-
new TStringWithEncoding(regexp.source, regexp.encoding),
130-
RegexpOptions.fromEmbeddedOptions(regexp.regex.getOptions()));
131-
} catch (DeferredRaiseException dre) {
132-
throw dre.getException(getContext());
133-
}
134-
135-
return classicRegexp.toByteArrayBuilder().toTStringWithEnc(regexp.encoding);
125+
var sourceEnc = new TStringWithEncoding(regexp.source, regexp.encoding);
126+
return ClassicRegexp.toS(sourceEnc, regexp.options);
136127
}
137128
}
138129

src/main/java/org/truffleruby/core/regexp/RubyRegexp.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ private RubyRegexp(Regex regex, RegexpOptions options) {
8181
final TStringWithEncoding tstringWithEncoding = (TStringWithEncoding) regex.getUserObject();
8282
this.source = tstringWithEncoding.tstring;
8383
this.encoding = tstringWithEncoding.getEncoding();
84+
assert encoding.jcoding == regex.getEncoding();
8485
this.options = options;
8586
this.cachedEncodings = new EncodingCache();
8687
this.tregexCache = new TRegexCache();

src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public FrozenStringLiterals(TStringCache tStringCache) {
3838

3939
@TruffleBoundary
4040
public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEncoding encoding) {
41-
return getFrozenStringLiteral(tstring.getInternalByteArrayUncached(encoding.tencoding),
41+
return getFrozenStringLiteral(
42+
tstring.getInternalByteArrayUncached(encoding.tencoding),
4243
TStringUtils.hasImmutableInternalByteArray(tstring),
4344
encoding);
4445
}
@@ -58,6 +59,20 @@ public ImmutableRubyString getFrozenStringLiteral(InternalByteArray byteArray, b
5859
}
5960
}
6061

62+
@TruffleBoundary
63+
public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, RubyEncoding encoding) {
64+
// Ensure all ImmutableRubyString have a TruffleString from the TStringCache
65+
var cachedTString = tstringCache.getTString(bytes, encoding);
66+
var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding);
67+
68+
final ImmutableRubyString string = values.get(tstringWithEncoding);
69+
if (string != null) {
70+
return string;
71+
} else {
72+
return values.addInCacheIfAbsent(tstringWithEncoding, new ImmutableRubyString(cachedTString, encoding));
73+
}
74+
}
75+
6176
public static ImmutableRubyString createStringAndCacheLater(TruffleString name,
6277
RubyEncoding encoding) {
6378
final ImmutableRubyString string = new ImmutableRubyString(name, encoding);

src/main/java/org/truffleruby/debug/TruffleDebugNodes.java

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.oracle.truffle.api.source.SourceSection;
3535
import com.oracle.truffle.api.strings.TruffleString;
3636
import org.graalvm.collections.Pair;
37+
import org.prism.ParseResult;
3738
import org.truffleruby.Layouts;
3839
import org.truffleruby.RubyLanguage;
3940
import org.truffleruby.annotations.CoreMethod;
@@ -256,19 +257,48 @@ Object ast(Object code,
256257
@Cached TruffleString.FromJavaStringNode fromJavaStringNode) {
257258
var codeString = new TStringWithEncoding(RubyGuards.asTruffleStringUncached(code),
258259
RubyStringLibrary.getUncached().getEncoding(code));
260+
261+
var rubySource = createRubySource(codeString);
262+
var parseResult = getParseResult(getLanguage(), rubySource);
263+
var ast = parseResult.value;
264+
265+
return createString(fromJavaStringNode, ast.toString(), Encodings.UTF_8);
266+
}
267+
268+
private static RubySource createRubySource(TStringWithEncoding code) {
259269
String name = "<parse_ast>";
260-
var source = Source.newBuilder("ruby", new ByteBasedCharSequence(codeString), name).build();
261-
var rubySource = new RubySource(source, name);
270+
var source = Source.newBuilder("ruby", new ByteBasedCharSequence(code), name).build();
271+
return new RubySource(source, name);
272+
}
262273

263-
var language = getLanguage();
274+
private static ParseResult getParseResult(RubyLanguage language, RubySource rubySource) {
264275
var yarpSource = YARPTranslatorDriver.createYARPSource(rubySource.getBytes());
265276
String sourcePath = rubySource.getSourcePath(language).intern();
266277

267-
var parseResult = YARPTranslatorDriver.parseToYARPAST(rubySource, sourcePath, yarpSource,
278+
return YARPTranslatorDriver.parseToYARPAST(rubySource, sourcePath, yarpSource,
268279
Collections.emptyList(), language.options.FROZEN_STRING_LITERALS);
269-
var ast = parseResult.value;
280+
}
281+
}
270282

271-
return createString(fromJavaStringNode, ast.toString(), Encodings.UTF_8);
283+
@CoreMethod(names = "profile_translator", onSingleton = true, required = 2, lowerFixnum = 2)
284+
public abstract static class ProfileTranslatorNode extends CoreMethodArrayArgumentsNode {
285+
@TruffleBoundary
286+
@Specialization
287+
Object profileTranslator(Object code, int repeat) {
288+
var codeString = new TStringWithEncoding(RubyGuards.asTruffleStringUncached(code),
289+
RubyStringLibrary.getUncached().getEncoding(code));
290+
291+
var rubySource = ParseASTNode.createRubySource(codeString);
292+
var parseResult = ParseASTNode.getParseResult(getLanguage(), rubySource);
293+
294+
var translator = new YARPTranslatorDriver(getContext());
295+
296+
for (int i = 0; i < repeat; i++) {
297+
translator.parse(rubySource, ParserContext.TOP_LEVEL, null, null, getContext().getRootLexicalScope(),
298+
this, parseResult);
299+
}
300+
301+
return nil;
272302
}
273303
}
274304

0 commit comments

Comments
 (0)