Skip to content

Commit bf4a4d0

Browse files
committed
Remove leading empty string literal node in InterpolatedRegexpNode that represents encoding either forced or specified with flag
1 parent 12cd32b commit bf4a4d0

9 files changed

+35
-66
lines changed

spec/truffle/parsing/fixtures/regexps/in_boolean_context_with_interpolation.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ ast: |
4141
receiver =
4242
InterpolatedRegexpNode
4343
attributes:
44+
encoding = ASCII-8BIT
4445
flags = 0
4546
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
4647
children:
@@ -49,19 +50,12 @@ ast: |
4950
builderNode =
5051
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
5152
attributes:
53+
encoding = ASCII-8BIT
5254
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
5355
children:
5456
equalNode =
5557
TruffleStringFactory$EqualNodeGen
5658
children = [
57-
ToSNodeGen
58-
children:
59-
valueNode_ =
60-
StringLiteralNode
61-
attributes:
62-
encoding = ASCII-8BIT
63-
flags = 0
64-
tstring = ""
6559
ToSNodeGen
6660
children:
6761
valueNode_ =

spec/truffle/parsing/fixtures/regexps/with_embedded_class_variable.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ruby: |
77
ast: |
88
InterpolatedRegexpNode
99
attributes:
10+
encoding = ASCII-8BIT
1011
flags = 0
1112
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
1213
children:
@@ -15,19 +16,12 @@ ast: |
1516
builderNode =
1617
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
1718
attributes:
19+
encoding = ASCII-8BIT
1820
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
1921
children:
2022
equalNode =
2123
TruffleStringFactory$EqualNodeGen
2224
children = [
23-
ToSNodeGen
24-
children:
25-
valueNode_ =
26-
StringLiteralNode
27-
attributes:
28-
encoding = ASCII-8BIT
29-
flags = 0
30-
tstring = ""
3125
ToSNodeGen
3226
children:
3327
valueNode_ =

spec/truffle/parsing/fixtures/regexps/with_embedded_global_variable.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ruby: |
77
ast: |
88
InterpolatedRegexpNode
99
attributes:
10+
encoding = ASCII-8BIT
1011
flags = 0
1112
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
1213
children:
@@ -15,19 +16,12 @@ ast: |
1516
builderNode =
1617
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
1718
attributes:
19+
encoding = ASCII-8BIT
1820
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
1921
children:
2022
equalNode =
2123
TruffleStringFactory$EqualNodeGen
2224
children = [
23-
ToSNodeGen
24-
children:
25-
valueNode_ =
26-
StringLiteralNode
27-
attributes:
28-
encoding = ASCII-8BIT
29-
flags = 0
30-
tstring = ""
3125
ToSNodeGen
3226
children:
3327
valueNode_ =

spec/truffle/parsing/fixtures/regexps/with_embedded_instance_variable.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ruby: |
77
ast: |
88
InterpolatedRegexpNode
99
attributes:
10+
encoding = ASCII-8BIT
1011
flags = 0
1112
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
1213
children:
@@ -15,19 +16,12 @@ ast: |
1516
builderNode =
1617
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
1718
attributes:
19+
encoding = ASCII-8BIT
1820
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
1921
children:
2022
equalNode =
2123
TruffleStringFactory$EqualNodeGen
2224
children = [
23-
ToSNodeGen
24-
children:
25-
valueNode_ =
26-
StringLiteralNode
27-
attributes:
28-
encoding = ASCII-8BIT
29-
flags = 0
30-
tstring = ""
3125
ToSNodeGen
3226
children:
3327
valueNode_ =

spec/truffle/parsing/fixtures/regexps/with_interpolation.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ ruby: |
77
ast: |
88
InterpolatedRegexpNode
99
attributes:
10+
encoding = ASCII-8BIT
1011
flags = 0
1112
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
1213
children:
@@ -15,19 +16,12 @@ ast: |
1516
builderNode =
1617
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
1718
attributes:
19+
encoding = ASCII-8BIT
1820
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
1921
children:
2022
equalNode =
2123
TruffleStringFactory$EqualNodeGen
2224
children = [
23-
ToSNodeGen
24-
children:
25-
valueNode_ =
26-
StringLiteralNode
27-
attributes:
28-
encoding = ASCII-8BIT
29-
flags = 0
30-
tstring = ""
3125
ToSNodeGen
3226
children:
3327
valueNode_ =

spec/truffle/parsing/fixtures/regexps/with_interpolation_without_expression.yaml

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ ruby: |
1010
ast: |
1111
InterpolatedRegexpNode
1212
attributes:
13+
encoding = ASCII-8BIT
1314
flags = 0
1415
rubyStringLibrary = org.truffleruby.language.library.RubyStringLibrary$Cached@...
1516
children:
@@ -18,19 +19,12 @@ ast: |
1819
builderNode =
1920
InterpolatedRegexpNodeFactory$RegexpBuilderNodeGen
2021
attributes:
22+
encoding = ASCII-8BIT
2123
options = RegexpOptions(kcode: NONE, kcodeDefault, literal)
2224
children:
2325
equalNode =
2426
TruffleStringFactory$EqualNodeGen
2527
children = [
26-
ToSNodeGen
27-
children:
28-
valueNode_ =
29-
StringLiteralNode
30-
attributes:
31-
encoding = ASCII-8BIT
32-
flags = 0
33-
tstring = ""
3428
ToSNodeGen
3529
children:
3630
valueNode_ =

src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import com.oracle.truffle.api.strings.TruffleString;
1313
import com.oracle.truffle.api.strings.TruffleString.AsTruffleStringNode;
1414
import org.truffleruby.core.cast.ToSNode;
15+
import org.truffleruby.core.encoding.RubyEncoding;
1516
import org.truffleruby.core.regexp.InterpolatedRegexpNodeFactory.RegexpBuilderNodeGen;
1617
import org.truffleruby.core.string.TStringWithEncoding;
1718
import org.truffleruby.language.NotOptimizedWarningNode;
@@ -33,10 +34,13 @@ public final class InterpolatedRegexpNode extends RubyContextSourceNode {
3334
@Child private RegexpBuilderNode builderNode;
3435
private final RubyStringLibrary rubyStringLibrary = RubyStringLibrary.create();
3536
@Child private AsTruffleStringNode asTruffleStringNode = AsTruffleStringNode.create();
37+
/** initial encoding to start encodings negotiation */
38+
private final RubyEncoding encoding;
3639

37-
public InterpolatedRegexpNode(ToSNode[] children, RegexpOptions options) {
40+
public InterpolatedRegexpNode(ToSNode[] children, RubyEncoding encoding, RegexpOptions options) {
3841
this.children = children;
39-
this.builderNode = RegexpBuilderNode.create(options);
42+
this.encoding = encoding;
43+
this.builderNode = RegexpBuilderNode.create(encoding, options);
4044
}
4145

4246
@Override
@@ -60,6 +64,7 @@ protected TStringWithEncoding[] executeChildren(VirtualFrame frame) {
6064
public RubyNode cloneUninitialized() {
6165
var copy = new InterpolatedRegexpNode(
6266
cloneUninitialized(children),
67+
encoding,
6368
builderNode.options);
6469
return copy.copyFlags(this);
6570
}
@@ -75,13 +80,15 @@ protected static ToSNode[] cloneUninitialized(ToSNode[] nodes) {
7580
public abstract static class RegexpBuilderNode extends RubyBaseNode {
7681

7782
@Child private TruffleString.EqualNode equalNode = TruffleString.EqualNode.create();
83+
private final RubyEncoding encoding;
7884
private final RegexpOptions options;
7985

80-
public static RegexpBuilderNode create(RegexpOptions options) {
81-
return RegexpBuilderNodeGen.create(options);
86+
public static RegexpBuilderNode create(RubyEncoding encoding, RegexpOptions options) {
87+
return RegexpBuilderNodeGen.create(encoding, options);
8288
}
8389

84-
public RegexpBuilderNode(RegexpOptions options) {
90+
public RegexpBuilderNode(RubyEncoding encoding, RegexpOptions options) {
91+
this.encoding = encoding;
8592
this.options = options;
8693
}
8794

@@ -117,8 +124,13 @@ protected boolean tstringsWithEncodingsMatch(TStringWithEncoding[] a, TStringWit
117124

118125
@TruffleBoundary
119126
protected RubyRegexp createRegexp(TStringWithEncoding[] strings) {
127+
// initial encoding is represented as a leading "" string in this encoding
128+
TStringWithEncoding[] stringsWithPrefix = new TStringWithEncoding[1 + strings.length];
129+
stringsWithPrefix[0] = new TStringWithEncoding(encoding.tencoding.getEmpty(), encoding);
130+
System.arraycopy(strings, 0, stringsWithPrefix, 1, strings.length);
131+
120132
try {
121-
var preprocessed = ClassicRegexp.preprocessDRegexp(getContext(), strings, options);
133+
var preprocessed = ClassicRegexp.preprocessDRegexp(getContext(), stringsWithPrefix, options);
122134
return RubyRegexp.create(getLanguage(), preprocessed.tstring, preprocessed.encoding, options, this);
123135
} catch (DeferredRaiseException dre) {
124136
throw dre.getException(getContext());

src/main/java/org/truffleruby/parser/BodyTranslator.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,7 @@ public RubyNode visitDRegxNode(DRegexpParseNode node) {
11151115

11161116
final InterpolatedRegexpNode i = new InterpolatedRegexpNode(
11171117
children.toArray(EMPTY_TO_S_NODE_ARRAY),
1118+
Encodings.getBuiltInEncoding(node.getEncoding()),
11181119
node.getOptions());
11191120
i.unsafeSetSourceSection(sourceSection);
11201121

src/main/java/org/truffleruby/parser/YARPTranslator.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2298,24 +2298,16 @@ public RubyNode visitInterpolatedRegularExpressionNode(Nodes.InterpolatedRegular
22982298
var encodingAndOptions = getRegexpEncodingAndOptions(new Nodes.RegularExpressionFlags(node.flags));
22992299
final ToSNode[] children = translateInterpolatedParts(node.parts);
23002300

2301-
// TODO: optimise AST and pass initial encoding as a parameter instead of passing as a StringLiteralNode
2302-
// 0 element represents initial Regexp encoding derived from explicit Regexp modifiers
2303-
final ToSNode[] childrenWithPrefix = new ToSNode[children.length + 1];
2304-
System.arraycopy(children, 0, childrenWithPrefix, 1, children.length);
2305-
final RubyEncoding prefixEncoding;
2301+
final RubyEncoding encoding;
23062302
if (!encodingAndOptions.options.isKcodeDefault()) { // explicit encoding
2307-
prefixEncoding = encodingAndOptions.encoding;
2303+
encoding = encodingAndOptions.encoding;
23082304
} else {
23092305
// use BINARY explicitly probably because forcing encoding isn't implemented yet in Prism
23102306
// see https://github.com/ruby/prism/issues/1997
2311-
prefixEncoding = Encodings.BINARY;
2307+
encoding = Encodings.BINARY;
23122308
}
23132309

2314-
var emptyTString = prefixEncoding.tencoding.getEmpty();
2315-
var stringNode = new StringLiteralNode(emptyTString, prefixEncoding);
2316-
childrenWithPrefix[0] = ToSNodeGen.create(stringNode);
2317-
2318-
RubyNode rubyNode = new InterpolatedRegexpNode(childrenWithPrefix, encodingAndOptions.options);
2310+
RubyNode rubyNode = new InterpolatedRegexpNode(children, encoding, encodingAndOptions.options);
23192311

23202312
if (node.isOnce()) {
23212313
rubyNode = new OnceNode(rubyNode);

0 commit comments

Comments
 (0)