Skip to content

Commit ae2d4ce

Browse files
committed
[GR-18163] Fix wrong constant/identifier detection in lexer for non-ASCII encodings (#2102)
PullRequest: truffleruby/2050
2 parents 845cfe3 + 5e82283 commit ae2d4ce

File tree

3 files changed

+48
-4
lines changed

3 files changed

+48
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Bug fixes:
1616
* Fixed `coverage` issue when `*eval` is used (#2078).
1717
* Use expanded load paths for feature matching (#1501).
1818
* Fixed `SystemStackError` sometimes replaced by an internal Java `NoClassDefFoundError` on JVM (#1743).
19+
* Fixed constant/identifier detection in lexer for non-ASCII encodings (#2079, #2102, @ivoanjo).
1920

2021
Compatibility:
2122

spec/ruby/language/variables_spec.rb

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,3 +796,32 @@ module VariableSpecs
796796
end
797797
end
798798
end
799+
800+
describe 'Allowed characters' do
801+
ruby_version_is "2.6" do
802+
# new feature in 2.6 -- https://bugs.ruby-lang.org/issues/13770
803+
it 'does not allow non-ASCII upcased characters at the beginning' do
804+
-> do
805+
eval <<-CODE
806+
def test
807+
ἍBB = 1
808+
end
809+
CODE
810+
end.should raise_error(SyntaxError, /dynamic constant assignment/)
811+
end
812+
end
813+
814+
it 'allows non-ASCII lowercased characters at the beginning' do
815+
result = nil
816+
817+
eval <<-CODE
818+
def test
819+
μ = 1
820+
end
821+
822+
result = test
823+
CODE
824+
825+
result.should == 1
826+
end
827+
end

src/main/java/org/truffleruby/parser/lexer/RubyLexer.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,7 +1802,7 @@ private int identifier(int c, boolean commandState) {
18021802
}
18031803
tempVal = createTokenRope();
18041804

1805-
if (result == 0 && Character.isUpperCase(tempVal.get(0) & 0xFF)) {
1805+
if (result == 0 && isFirstCodepointUppercase(tempVal)) {
18061806
result = RubyParser.tCONSTANT;
18071807
} else {
18081808
result = RubyParser.tIDENTIFIER;
@@ -3503,12 +3503,12 @@ public boolean update_heredoc_indent(int c) {
35033503
}
35043504

35053505
public void validateFormalIdentifier(Rope identifier) {
3506-
int first = identifier.get(0) & 0xFF;
3507-
3508-
if (Character.isUpperCase(first)) {
3506+
if (isFirstCodepointUppercase(identifier)) {
35093507
compile_error("formal argument cannot be a constant");
35103508
}
35113509

3510+
int first = identifier.get(0) & 0xFF;
3511+
35123512
switch (first) {
35133513
case '@':
35143514
if (identifier.get(1) == '@') {
@@ -3675,4 +3675,18 @@ protected boolean isSpaceArg(int c, boolean spaceSeen) {
36753675
return isARG() && spaceSeen && !Character.isWhitespace(c);
36763676
}
36773677

3678+
/** Encoding-aware (including multi-byte encodings) check of first codepoint of a given rope, usually to determine
3679+
* if it is a constant */
3680+
private boolean isFirstCodepointUppercase(Rope rope) {
3681+
Encoding ropeEncoding = rope.encoding;
3682+
int firstByte = rope.get(0) & 0xFF;
3683+
3684+
if (ropeEncoding.isAsciiCompatible() && isASCII(firstByte)) {
3685+
return StringSupport.isAsciiUppercase((byte) firstByte);
3686+
} else {
3687+
byte[] ropeBytes = rope.getBytes();
3688+
int firstCharacter = ropeEncoding.mbcToCode(ropeBytes, 0, ropeBytes.length);
3689+
return ropeEncoding.isUpper(firstCharacter);
3690+
}
3691+
}
36783692
}

0 commit comments

Comments
 (0)