Skip to content

Commit ff60b1b

Browse files
committed
Fix character length calculation for broken strings with a fixed-width encoding.
1 parent 1cbe187 commit ff60b1b

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

src/main/java/org/truffleruby/core/rope/RopeNodes.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,10 +347,12 @@ public StringAttributes calculateAttributesAsciiCompatibleGeneric(Encoding encod
347347
return new StringAttributes(characters, codeRange);
348348
}
349349

350+
350351
@Specialization(guards = { "!isEmpty(bytes)", "!isBinaryString(encoding)", "!isAsciiCompatible(encoding)" })
351352
public StringAttributes calculateAttributesGeneric(Encoding encoding, byte[] bytes,
352353
@Cached("create()") CalculateCharacterLengthNode calculateCharacterLengthNode,
353-
@Cached("createBinaryProfile()") ConditionProfile validCharacterProfile) {
354+
@Cached("createBinaryProfile()") ConditionProfile validCharacterProfile,
355+
@Cached("createBinaryProfile()") ConditionProfile fixedWidthProfile) {
354356
// Taken from StringSupport.strLengthWithCodeRangeNonAsciiCompatible.
355357

356358
CodeRange codeRange = CR_VALID;
@@ -369,7 +371,16 @@ public StringAttributes calculateAttributesGeneric(Encoding encoding, byte[] byt
369371
p += lengthOfCurrentCharacter;
370372
} else {
371373
codeRange = CR_BROKEN;
372-
p++;
374+
375+
// If a string is detected as broken and we already know the character length due to a
376+
// fixed width encoding, there's no value in visiting any more bytes.
377+
if (fixedWidthProfile.profile(encoding.isFixedWidth())) {
378+
characters = (bytes.length + encoding.minLength() - 1) / encoding.minLength();
379+
380+
break;
381+
} else {
382+
p++;
383+
}
373384
}
374385
}
375386

0 commit comments

Comments
 (0)