Skip to content

Commit 65a961e

Browse files
committed
Use JCodings to check for uppercase char instead of Java String
This improves on 3ec4747 by: * Avoiding a Rope to String conversion just to be able to extract the first character in an encoding-aware way * Avoiding issues with surrogate characters -- as pointed out by @eregon during PR review -- `String::charAt(...)` can still slice characters in half as discussed in <https://stackoverflow.com/questions/5903008/what-is-a-surrogate-pair-in-java> PullRequest: truffleruby/2102
1 parent 73f2313 commit 65a961e

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

src/main/java/org/truffleruby/parser/lexer/RubyLexer.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1798,7 +1798,7 @@ private int identifier(int c, boolean commandState) {
17981798
}
17991799
tempVal = createTokenRope();
18001800

1801-
if (result == 0 && Character.isUpperCase(tempVal.getString().charAt(0))) {
1801+
if (result == 0 && isFirstCharacterEncodingAwareUppercase(tempVal)) {
18021802
result = RubyParser.tCONSTANT;
18031803
} else {
18041804
result = RubyParser.tIDENTIFIER;
@@ -3493,12 +3493,12 @@ public boolean update_heredoc_indent(int c) {
34933493
}
34943494

34953495
public void validateFormalIdentifier(Rope identifier) {
3496-
int first = identifier.getString().charAt(0);
3497-
3498-
if (Character.isUpperCase(first)) {
3496+
if (isFirstCharacterEncodingAwareUppercase(identifier)) {
34993497
compile_error("formal argument cannot be a constant");
35003498
}
35013499

3500+
int first = identifier.getString().charAt(0);
3501+
35023502
switch (first) {
35033503
case '@':
35043504
if (identifier.get(1) == '@') {
@@ -3665,4 +3665,11 @@ protected boolean isSpaceArg(int c, boolean spaceSeen) {
36653665
return isARG() && spaceSeen && !Character.isWhitespace(c);
36663666
}
36673667

3668+
/** Encoding-aware (including multi-byte encodings) check of first character of a given rope, usually to determine
3669+
* if it is a constant */
3670+
private boolean isFirstCharacterEncodingAwareUppercase(Rope rope) {
3671+
byte[] ropeBytes = rope.getBytes();
3672+
int firstCharacter = rope.encoding.mbcToCode(ropeBytes, 0, ropeBytes.length);
3673+
return rope.encoding.isUpper(firstCharacter);
3674+
}
36683675
}

0 commit comments

Comments
 (0)