Skip to content

Commit 16b3c2e

Browse files
committed
[GR-20471] Fix characterLength() for BINARY ConcatRope.
PullRequest: truffleruby/1260
2 parents af55670 + e7eaafa commit 16b3c2e

18 files changed

+150
-88
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Bug fixes:
6969
* Fixed `ENV.replace` implementation.
7070
* Fixed `ENV.udpate` implementation.
7171
* Fixed argument handling in `Kernel.printf`.
72+
* Fixed character length after conversion to binary from a non-US-ASCII String.
7273

7374
Compatibility:
7475

spec/ruby/core/string/shared/length.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,17 @@
2323

2424
str.force_encoding('BINARY').send(@method).should == 12
2525
end
26+
27+
it "returns the correct length after force_encoding(BINARY)" do
28+
utf8 = "あ"
29+
ascii = "a"
30+
concat = utf8 + ascii
31+
32+
concat.encoding.should == Encoding::UTF_8
33+
concat.bytesize.should == 4
34+
35+
concat.size.should == 2
36+
concat.force_encoding(Encoding::ASCII_8BIT)
37+
concat.size.should == 4
38+
end
2639
end

src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,8 @@ private static Encoding processDRegexpElement(RubyContext context, RegexpOptions
669669
private static final int QUOTED_V = 11;
670670

671671
@TruffleBoundary
672-
public static Rope quote19(Rope bs, boolean asciiOnly) {
672+
public static Rope quote19(Rope bs) {
673+
final boolean asciiOnly = bs.isAsciiOnly();
673674
int p = 0;
674675
int end = bs.byteLength();
675676
final byte[] bytes = bs.getBytes();
@@ -721,7 +722,7 @@ public static Rope quote19(Rope bs, boolean asciiOnly) {
721722
p += cl;
722723
}
723724
if (asciiOnly) {
724-
return bs.withEncoding(USASCIIEncoding.INSTANCE, CR_7BIT);
725+
return RopeOperations.withEncoding(bs, USASCIIEncoding.INSTANCE);
725726
}
726727
return bs;
727728
} while (false);

src/main/java/org/truffleruby/core/regexp/RegexpNodes.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ public abstract static class QuoteNode extends CoreMethodArrayArgumentsNode {
211211
@Specialization(guards = "isRubyString(raw)")
212212
protected DynamicObject quoteString(DynamicObject raw) {
213213
final Rope rope = StringOperations.rope(raw);
214-
boolean isAsciiOnly = rope.getEncoding().isAsciiCompatible() && rope.getCodeRange() == CodeRange.CR_7BIT;
215-
return getMakeStringNode().fromRope(ClassicRegexp.quote19(rope, isAsciiOnly));
214+
return getMakeStringNode().fromRope(ClassicRegexp.quote19(rope));
216215
}
217216

218217
@Specialization(guards = "isRubySymbol(raw)")

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
import org.truffleruby.core.regexp.TruffleRegexpNodesFactory.MatchNodeGen;
4040
import org.truffleruby.core.rope.Rope;
4141
import org.truffleruby.core.rope.RopeBuilder;
42-
import org.truffleruby.core.rope.RopeNodes;
4342
import org.truffleruby.core.rope.RopeOperations;
4443
import org.truffleruby.core.string.StringNodes;
4544
import org.truffleruby.core.string.StringNodes.StringAppendPrimitiveNode;
@@ -71,7 +70,6 @@ public static abstract class RegexpUnionNode extends CoreMethodArrayArgumentsNod
7170
@Child CallDispatchHeadNode copyNode = CallDispatchHeadNode.createPrivate();
7271
@Child private SameOrEqualNode sameOrEqualNode = SameOrEqualNode.create();
7372
@Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create();
74-
@Child private RopeNodes.AsciiOnlyNode asciiOnlyNode = RopeNodes.AsciiOnlyNode.create();
7573

7674
@Specialization(guards = "argsMatch(frame, cachedArgs, args)", limit = "getDefaultCacheLimit()")
7775
protected Object executeFastUnion(VirtualFrame frame, DynamicObject str, DynamicObject sep, Object[] args,
@@ -101,8 +99,7 @@ public DynamicObject buildUnion(DynamicObject str, DynamicObject sep, Object[] a
10199
public DynamicObject string(Object obj) {
102100
if (RubyGuards.isRubyString(obj)) {
103101
final Rope rope = StringOperations.rope((DynamicObject) obj);
104-
final boolean isAsciiOnly = asciiOnlyNode.execute(rope);
105-
return makeStringNode.fromRope(ClassicRegexp.quote19(rope, isAsciiOnly));
102+
return makeStringNode.fromRope(ClassicRegexp.quote19(rope));
106103
} else {
107104
return toSNode.execute((DynamicObject) obj);
108105
}

src/main/java/org/truffleruby/core/rope/AsciiOnlyLeafRope.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ public AsciiOnlyLeafRope(byte[] bytes, Encoding encoding) {
2323
}
2424

2525
@Override
26-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
27-
if (newCodeRange != getCodeRange()) {
28-
CompilerDirectives.transferToInterpreterAndInvalidate();
29-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
30-
}
31-
26+
Rope withEncoding7bit(Encoding newEncoding) {
3227
return new AsciiOnlyLeafRope(getRawBytes(), newEncoding);
3328
}
29+
30+
@Override
31+
Rope withBinaryEncoding() {
32+
CompilerDirectives.transferToInterpreterAndInvalidate();
33+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
34+
}
3435
}

src/main/java/org/truffleruby/core/rope/CodeRange.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,22 @@
1717
import com.oracle.truffle.api.CompilerDirectives;
1818

1919
public enum CodeRange {
20+
/**
21+
* Used for {@link NativeRope}, where the bytes can change from real native code.
22+
* Also used when building a new {@link Rope} and the code range is unknown.
23+
*/
2024
CR_UNKNOWN(0),
25+
/**
26+
* Only used for ASCII-compatible encodings, when all characters are US-ASCII (7-bit).
27+
*/
2128
CR_7BIT(1),
29+
/**
30+
* All characters are valid, but at least one non-7-bit character.
31+
*/
2232
CR_VALID(2),
33+
/**
34+
* At least one character is not valid in the encoding of that Rope.
35+
*/
2336
CR_BROKEN(3);
2437

2538
private final int value;

src/main/java/org/truffleruby/core/rope/ConcatRope.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
import org.jcodings.Encoding;
1313

14-
import com.oracle.truffle.api.CompilerDirectives;
1514
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
15+
import org.jcodings.specific.ASCIIEncoding;
1616

1717
public class ConcatRope extends ManagedRope {
1818

@@ -27,22 +27,23 @@ public ConcatRope(
2727
CodeRange codeRange,
2828
int depth,
2929
boolean balanced) {
30-
this(left, right, encoding, codeRange, depth, null, balanced);
30+
this(left, right, encoding, codeRange, left.characterLength() + right.characterLength(), depth, null, balanced);
3131
}
3232

3333
private ConcatRope(
3434
ManagedRope left,
3535
ManagedRope right,
3636
Encoding encoding,
3737
CodeRange codeRange,
38+
int characterLength,
3839
int depth,
3940
byte[] bytes,
4041
boolean balanced) {
4142
super(
4243
encoding,
4344
codeRange,
4445
left.byteLength() + right.byteLength(),
45-
left.characterLength() + right.characterLength(),
46+
characterLength,
4647
depth,
4748
bytes);
4849
this.left = left;
@@ -51,13 +52,31 @@ private ConcatRope(
5152
}
5253

5354
@Override
54-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
55-
if (newCodeRange != getCodeRange()) {
56-
CompilerDirectives.transferToInterpreterAndInvalidate();
57-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
58-
}
55+
Rope withEncoding7bit(Encoding newEncoding) {
56+
assert getCodeRange() == CodeRange.CR_7BIT;
57+
return new ConcatRope(
58+
getLeft(),
59+
getRight(),
60+
newEncoding,
61+
CodeRange.CR_7BIT,
62+
characterLength(),
63+
depth(),
64+
getRawBytes(),
65+
balanced);
66+
}
5967

60-
return new ConcatRope(getLeft(), getRight(), newEncoding, newCodeRange, depth(), getRawBytes(), balanced);
68+
@Override
69+
Rope withBinaryEncoding() {
70+
assert getCodeRange() == CodeRange.CR_VALID;
71+
return new ConcatRope(
72+
getLeft(),
73+
getRight(),
74+
ASCIIEncoding.INSTANCE,
75+
CodeRange.CR_VALID,
76+
byteLength(),
77+
depth(),
78+
getRawBytes(),
79+
balanced);
6180
}
6281

6382
@Override

src/main/java/org/truffleruby/core/rope/InvalidLeafRope.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ public InvalidLeafRope(byte[] bytes, Encoding encoding, int characterLength) {
2323
}
2424

2525
@Override
26-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
27-
if (newCodeRange != getCodeRange()) {
28-
CompilerDirectives.transferToInterpreterAndInvalidate();
29-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
30-
}
31-
32-
final int newCharacterLength = RopeOperations.strLength(newEncoding, getRawBytes(), 0, byteLength());
33-
return new InvalidLeafRope(getRawBytes(), newEncoding, newCharacterLength);
26+
Rope withEncoding7bit(Encoding newEncoding) {
27+
CompilerDirectives.transferToInterpreterAndInvalidate();
28+
throw new UnsupportedOperationException("Must only be called for ASCII-only Strings");
29+
}
30+
31+
@Override
32+
Rope withBinaryEncoding() {
33+
CompilerDirectives.transferToInterpreterAndInvalidate();
34+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
3435
}
3536
}

src/main/java/org/truffleruby/core/rope/LazyIntRope.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,17 @@ private static int length(int value) {
5555
}
5656

5757
@Override
58-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
59-
if (newCodeRange != getCodeRange()) {
60-
CompilerDirectives.transferToInterpreterAndInvalidate();
61-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
62-
}
63-
58+
Rope withEncoding7bit(Encoding newEncoding) {
59+
assert getCodeRange() == CodeRange.CR_7BIT;
6460
return new LazyIntRope(value, newEncoding, length(value));
6561
}
6662

63+
@Override
64+
Rope withBinaryEncoding() {
65+
CompilerDirectives.transferToInterpreterAndInvalidate();
66+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
67+
}
68+
6769
@Override
6870
public byte[] fulfill() {
6971
if (bytes == null) {

0 commit comments

Comments
 (0)