Skip to content

Commit 7dc5519

Browse files
committed
Adopt RegularExpressionFlags for RegularExpressionNode
1 parent cae3c8c commit 7dc5519

File tree

8 files changed

+181
-1
lines changed

8 files changed

+181
-1
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
fails:Parsing a BEGIN block (BEGIN { ... }) case is parsed correctly
22
fails:Parsing a END block (END { ... }) case is parsed correctly
33
fails:Parsing a For operator (for ... in ... operator) case is parsed correctly
4+
5+
# Not supported yet by Prism
6+
# See https://github.com/ruby/prism/issues/1997
7+
fails:Parsing a Regexp (encoding / when there are non-ASCII characters in a literal) case is parsed correctly
8+
fails:Parsing a Regexp (encoding in boolean context / when there are non-ASCII characters in a literal) case is parsed correctly
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
subject: "Regexp"
2+
description: "encoding / when there are ASCII characters only in a literal"
3+
notes: >
4+
Regexp is forced to the US-ASCII encoding
5+
focused_on_node: "org.truffleruby.language.literal.ObjectLiteralNode"
6+
ruby: |
7+
# encoding: utf-8
8+
/abc/
9+
ast: |
10+
ObjectLiteralNode
11+
attributes:
12+
flags = 1
13+
object = RubyRegexp(source = abc, options = RegexpOptions(kcode: NONE, kcodeDefault, literal), encoding = US-ASCII)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
subject: "Regexp"
2+
description: "encoding / when there are non-ASCII characters in a literal"
3+
notes: >
4+
Regexp may be forced to the BINARY (ASCII-8BIT) encoding sometimes
5+
focused_on_node: "org.truffleruby.language.literal.ObjectLiteralNode"
6+
ruby: |
7+
# encoding: us-ascii
8+
/abc \xFF/
9+
ast: |
10+
ObjectLiteralNode
11+
attributes:
12+
flags = 1
13+
object = RubyRegexp(source = abc \xFF, options = RegexpOptions(kcode: NONE, fixed, kcodeDefault, literal), encoding = ASCII-8BIT)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
subject: "Regexp"
2+
description: "encoding / when there are UTF-8 characters in a literal"
3+
notes: >
4+
Regexp may be forced to the UTF-8 encoding sometimes
5+
focused_on_node: "org.truffleruby.language.literal.ObjectLiteralNode"
6+
ruby: |
7+
# encoding: us-ascii
8+
/abc \u{A3}/
9+
ast: |
10+
ObjectLiteralNode
11+
attributes:
12+
flags = 1
13+
object = RubyRegexp(source = abc \u{A3}, options = RegexpOptions(kcode: NONE, fixed, kcodeDefault, literal), encoding = UTF-8)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
subject: "Regexp"
2+
description: "encoding in boolean context / when there are ASCII characters only in a literal"
3+
notes: >
4+
Regexp is forced to the US-ASCII encoding
5+
focused_on_node: "org.truffleruby.language.dispatch.RubyCallNode"
6+
ruby: |
7+
# encoding: utf-8
8+
/abc/ ? 1 : 2
9+
ast: |
10+
RubyCallNode
11+
attributes:
12+
descriptor = NoKeywordArgumentsDescriptor
13+
dispatchConfig = PROTECTED
14+
emptyKeywordsProfile = false
15+
flags = 1
16+
isAttrAssign = false
17+
isSafeNavigation = false
18+
isSplatted = false
19+
isVCall = false
20+
lastArgIsNotHashProfile = false
21+
methodName = "=~"
22+
notEmptyKeywordsProfile = false
23+
notRuby2KeywordsHashProfile = false
24+
children:
25+
arguments = [
26+
ReadGlobalVariableNodeGen
27+
attributes:
28+
flags = 0
29+
name = "$_"
30+
children:
31+
lookupGlobalVariableStorageNode =
32+
LookupGlobalVariableStorageNodeGen
33+
attributes:
34+
index = -1
35+
name = "$_"
36+
]
37+
receiver =
38+
ObjectLiteralNode
39+
attributes:
40+
flags = 0
41+
object = RubyRegexp(source = abc, options = RegexpOptions(kcode: NONE, kcodeDefault, literal), encoding = US-ASCII)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
subject: "Regexp"
2+
description: "encoding in boolean context / when there are non-ASCII characters in a literal"
3+
notes: >
4+
Regexp may be forced to the BINARY (ASCII-8BIT) encoding sometimes
5+
focused_on_node: "org.truffleruby.language.dispatch.RubyCallNode"
6+
ruby: |
7+
# encoding: us-ascii
8+
/abc \xFF/ ? 1 : 2
9+
ast: |
10+
RubyCallNode
11+
attributes:
12+
descriptor = NoKeywordArgumentsDescriptor
13+
dispatchConfig = PROTECTED
14+
emptyKeywordsProfile = false
15+
flags = 1
16+
isAttrAssign = false
17+
isSafeNavigation = false
18+
isSplatted = false
19+
isVCall = false
20+
lastArgIsNotHashProfile = false
21+
methodName = "=~"
22+
notEmptyKeywordsProfile = false
23+
notRuby2KeywordsHashProfile = false
24+
children:
25+
arguments = [
26+
ReadGlobalVariableNodeGen
27+
attributes:
28+
flags = 0
29+
name = "$_"
30+
children:
31+
lookupGlobalVariableStorageNode =
32+
LookupGlobalVariableStorageNodeGen
33+
attributes:
34+
index = -1
35+
name = "$_"
36+
]
37+
receiver =
38+
ObjectLiteralNode
39+
attributes:
40+
flags = 0
41+
object = RubyRegexp(source = abc \xFF, options = RegexpOptions(kcode: NONE, fixed, kcodeDefault, literal), encoding = ASCII-8BIT)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
subject: "Regexp"
2+
description: "encoding in boolean context / when there are UTF-8 characters in a literal"
3+
notes: >
4+
Regexp may be forced to the UTF-8 encoding sometimes
5+
focused_on_node: "org.truffleruby.language.dispatch.RubyCallNode"
6+
ruby: |
7+
# encoding: us-ascii
8+
/abc \u{A3}/ ? 1 : 2
9+
ast: |
10+
RubyCallNode
11+
attributes:
12+
descriptor = NoKeywordArgumentsDescriptor
13+
dispatchConfig = PROTECTED
14+
emptyKeywordsProfile = false
15+
flags = 1
16+
isAttrAssign = false
17+
isSafeNavigation = false
18+
isSplatted = false
19+
isVCall = false
20+
lastArgIsNotHashProfile = false
21+
methodName = "=~"
22+
notEmptyKeywordsProfile = false
23+
notRuby2KeywordsHashProfile = false
24+
children:
25+
arguments = [
26+
ReadGlobalVariableNodeGen
27+
attributes:
28+
flags = 0
29+
name = "$_"
30+
children:
31+
lookupGlobalVariableStorageNode =
32+
LookupGlobalVariableStorageNodeGen
33+
attributes:
34+
index = -1
35+
name = "$_"
36+
]
37+
receiver =
38+
ObjectLiteralNode
39+
attributes:
40+
flags = 0
41+
object = RubyRegexp(source = abc \u{A3}, options = RegexpOptions(kcode: NONE, fixed, kcodeDefault, literal), encoding = UTF-8)

src/main/java/org/truffleruby/parser/YARPTranslator.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2536,10 +2536,13 @@ private record RegexpEncodingAndOptions(RubyEncoding encoding, RegexpOptions opt
25362536
}
25372537

25382538
private RegexpEncodingAndOptions getRegexpEncodingAndOptions(Nodes.RegularExpressionFlags flags) {
2539+
RubyEncoding regexpEncoding;
2540+
2541+
// regexp options
25392542
final KCode kcode;
2540-
final RubyEncoding regexpEncoding;
25412543
final boolean fixed;
25422544
boolean explicitEncoding = true;
2545+
25432546
if (flags.isAscii8bit()) {
25442547
fixed = false;
25452548
kcode = KCode.NONE;
@@ -2563,6 +2566,16 @@ private RegexpEncodingAndOptions getRegexpEncodingAndOptions(Nodes.RegularExpres
25632566
explicitEncoding = false;
25642567
}
25652568

2569+
if (!explicitEncoding) {
2570+
if (flags.isForcedBinaryEncoding()) {
2571+
regexpEncoding = Encodings.BINARY;
2572+
} else if (flags.isForcedUsAsciiEncoding()) {
2573+
regexpEncoding = Encodings.US_ASCII;
2574+
} else if (flags.isForcedUtf8Encoding()) {
2575+
regexpEncoding = Encodings.UTF_8;
2576+
}
2577+
}
2578+
25662579
final RegexpOptions options = new RegexpOptions(kcode, fixed, flags.isOnce(), flags.isExtended(),
25672580
flags.isMultiLine(), flags.isIgnoreCase(), flags.isAscii8bit(), !explicitEncoding, true);
25682581
return new RegexpEncodingAndOptions(regexpEncoding, options);

0 commit comments

Comments
 (0)