Skip to content

Commit e7eaafa

Browse files
committed
[GR-20471] Split Rope#withEncoding() in withEncoding7bit() and withBinaryEncoding()
* This makes it easier to reason about them. * Also fixes the bug for the character length with ConcatRope#withEncoding(BINARY, CR_VALID). * Reduce visibility of these methods since they should only be used by WithEncodingNode.
1 parent 7768a69 commit e7eaafa

13 files changed

+108
-59
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Bug fixes:
6969
* Fixed `ENV.replace` implementation.
7070
* Fixed `ENV.udpate` implementation.
7171
* Fixed argument handling in `Kernel.printf`.
72+
* Fixed character length after conversion to binary from a non-US-ASCII String.
7273

7374
Compatibility:
7475

src/main/java/org/truffleruby/core/rope/AsciiOnlyLeafRope.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ public AsciiOnlyLeafRope(byte[] bytes, Encoding encoding) {
2323
}
2424

2525
@Override
26-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
27-
if (newCodeRange != getCodeRange()) {
28-
CompilerDirectives.transferToInterpreterAndInvalidate();
29-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
30-
}
31-
26+
Rope withEncoding7bit(Encoding newEncoding) {
3227
return new AsciiOnlyLeafRope(getRawBytes(), newEncoding);
3328
}
29+
30+
@Override
31+
Rope withBinaryEncoding() {
32+
CompilerDirectives.transferToInterpreterAndInvalidate();
33+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
34+
}
3435
}

src/main/java/org/truffleruby/core/rope/ConcatRope.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
import org.jcodings.Encoding;
1313

14-
import com.oracle.truffle.api.CompilerDirectives;
1514
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
15+
import org.jcodings.specific.ASCIIEncoding;
1616

1717
public class ConcatRope extends ManagedRope {
1818

@@ -27,22 +27,23 @@ public ConcatRope(
2727
CodeRange codeRange,
2828
int depth,
2929
boolean balanced) {
30-
this(left, right, encoding, codeRange, depth, null, balanced);
30+
this(left, right, encoding, codeRange, left.characterLength() + right.characterLength(), depth, null, balanced);
3131
}
3232

3333
private ConcatRope(
3434
ManagedRope left,
3535
ManagedRope right,
3636
Encoding encoding,
3737
CodeRange codeRange,
38+
int characterLength,
3839
int depth,
3940
byte[] bytes,
4041
boolean balanced) {
4142
super(
4243
encoding,
4344
codeRange,
4445
left.byteLength() + right.byteLength(),
45-
left.characterLength() + right.characterLength(),
46+
characterLength,
4647
depth,
4748
bytes);
4849
this.left = left;
@@ -51,13 +52,31 @@ private ConcatRope(
5152
}
5253

5354
@Override
54-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
55-
if (newCodeRange != getCodeRange()) {
56-
CompilerDirectives.transferToInterpreterAndInvalidate();
57-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
58-
}
55+
Rope withEncoding7bit(Encoding newEncoding) {
56+
assert getCodeRange() == CodeRange.CR_7BIT;
57+
return new ConcatRope(
58+
getLeft(),
59+
getRight(),
60+
newEncoding,
61+
CodeRange.CR_7BIT,
62+
characterLength(),
63+
depth(),
64+
getRawBytes(),
65+
balanced);
66+
}
5967

60-
return new ConcatRope(getLeft(), getRight(), newEncoding, newCodeRange, depth(), getRawBytes(), balanced);
68+
@Override
69+
Rope withBinaryEncoding() {
70+
assert getCodeRange() == CodeRange.CR_VALID;
71+
return new ConcatRope(
72+
getLeft(),
73+
getRight(),
74+
ASCIIEncoding.INSTANCE,
75+
CodeRange.CR_VALID,
76+
byteLength(),
77+
depth(),
78+
getRawBytes(),
79+
balanced);
6180
}
6281

6382
@Override

src/main/java/org/truffleruby/core/rope/InvalidLeafRope.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ public InvalidLeafRope(byte[] bytes, Encoding encoding, int characterLength) {
2323
}
2424

2525
@Override
26-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
27-
if (newCodeRange != getCodeRange()) {
28-
CompilerDirectives.transferToInterpreterAndInvalidate();
29-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
30-
}
31-
32-
final int newCharacterLength = RopeOperations.strLength(newEncoding, getRawBytes(), 0, byteLength());
33-
return new InvalidLeafRope(getRawBytes(), newEncoding, newCharacterLength);
26+
Rope withEncoding7bit(Encoding newEncoding) {
27+
CompilerDirectives.transferToInterpreterAndInvalidate();
28+
throw new UnsupportedOperationException("Must only be called for ASCII-only Strings");
29+
}
30+
31+
@Override
32+
Rope withBinaryEncoding() {
33+
CompilerDirectives.transferToInterpreterAndInvalidate();
34+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
3435
}
3536
}

src/main/java/org/truffleruby/core/rope/LazyIntRope.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,17 @@ private static int length(int value) {
5555
}
5656

5757
@Override
58-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
59-
if (newCodeRange != getCodeRange()) {
60-
CompilerDirectives.transferToInterpreterAndInvalidate();
61-
throw new UnsupportedOperationException("Cannot fast-path updating encoding with different code range.");
62-
}
63-
58+
Rope withEncoding7bit(Encoding newEncoding) {
59+
assert getCodeRange() == CodeRange.CR_7BIT;
6460
return new LazyIntRope(value, newEncoding, length(value));
6561
}
6662

63+
@Override
64+
Rope withBinaryEncoding() {
65+
CompilerDirectives.transferToInterpreterAndInvalidate();
66+
throw new UnsupportedOperationException("Must only be called for CR_VALID Strings");
67+
}
68+
6769
@Override
6870
public byte[] fulfill() {
6971
if (bytes == null) {

src/main/java/org/truffleruby/core/rope/NativeRope.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,16 @@ public String toString() {
189189
}
190190

191191
@Override
192-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
192+
Rope withEncoding7bit(Encoding newEncoding) {
193193
return withEncoding(newEncoding);
194194
}
195195

196-
public NativeRope withEncoding(Encoding newEncoding) {
196+
@Override
197+
Rope withBinaryEncoding() {
198+
return withEncoding(ASCIIEncoding.INSTANCE);
199+
}
200+
201+
NativeRope withEncoding(Encoding newEncoding) {
197202
return new NativeRope(pointer, byteLength(), newEncoding, UNKNOWN_CHARACTER_LENGTH, CodeRange.CR_UNKNOWN);
198203
}
199204

src/main/java/org/truffleruby/core/rope/RepeatingRope.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
package org.truffleruby.core.rope;
1212

1313
import org.jcodings.Encoding;
14+
import org.jcodings.specific.ASCIIEncoding;
1415

1516
/**
1617
* A RepeatingRope always has the same encoding as its child
@@ -33,10 +34,17 @@ public RepeatingRope(ManagedRope child, int times) {
3334
}
3435

3536
@Override
36-
public Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange) {
37+
Rope withEncoding7bit(Encoding newEncoding) {
38+
assert getCodeRange() == CodeRange.CR_7BIT;
3739
return new RepeatingRope((ManagedRope) RopeOperations.withEncoding(child, newEncoding), times);
3840
}
3941

42+
@Override
43+
Rope withBinaryEncoding() {
44+
assert getCodeRange() == CodeRange.CR_VALID;
45+
return new RepeatingRope((ManagedRope) RopeOperations.withEncoding(child, ASCIIEncoding.INSTANCE), times);
46+
}
47+
4048
@Override
4149
protected byte[] getBytesSlow() {
4250
if (child.getRawBytes() != null) {

src/main/java/org/truffleruby/core/rope/Rope.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,18 @@ protected Rope(Encoding encoding, int byteLength, int ropeDepth, byte[] bytes) {
3939
}
4040

4141
/**
42-
* Fast-path encoding change when there is little to do.
43-
*
44-
* @param newEncoding the new Encoding
45-
* @param newCodeRange always CR-7BIT, except when newEncoding is BINARY can also be CR-VALID
42+
* Only used internally by WithEncodingNode.
43+
* Returns a Rope with the given Encoding.
44+
* Both the original and new Encodings must be ASCII-compatible and the rope must be {@link #isAsciiOnly()}.
4645
*/
47-
public abstract Rope withEncoding(Encoding newEncoding, CodeRange newCodeRange);
46+
abstract Rope withEncoding7bit(Encoding newEncoding);
47+
48+
/**
49+
* Only used internally by WithEncodingNode.
50+
* Returns a Rope with the BINARY Encoding.
51+
* The original Encoding must be ASCII-compatible and {@link #getCodeRange()} must be {@link CodeRange#CR_VALID} to call this.
52+
*/
53+
abstract Rope withBinaryEncoding();
4854

4955
public abstract int characterLength();
5056

src/main/java/org/truffleruby/core/rope/RopeNodes.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,12 +1070,14 @@ protected Rope withEncodingAsciiCompatible(ManagedRope rope, Encoding encoding,
10701070
if (asciiCompatibleProfile.profile(encoding.isAsciiCompatible())) {
10711071
if (asciiOnlyProfile.profile(rope.isAsciiOnly())) {
10721072
// ASCII-only strings can trivially convert to other ASCII-compatible encodings.
1073-
return cachedRopeClass.cast(rope).withEncoding(encoding, CR_7BIT);
1073+
return cachedRopeClass.cast(rope).withEncoding7bit(encoding);
10741074
} else if (binaryEncodingProfile.profile(encoding == ASCIIEncoding.INSTANCE &&
10751075
rope.getCodeRange() == CR_VALID &&
10761076
rope.getEncoding().isAsciiCompatible())) {
10771077
// ASCII-compatible CR_VALID strings are also CR_VALID in binary, but they might change character length.
1078-
return cachedRopeClass.cast(rope).withEncoding(ASCIIEncoding.INSTANCE, CR_VALID);
1078+
final Rope binary = cachedRopeClass.cast(rope).withBinaryEncoding();
1079+
assert binary.getCodeRange() == CR_VALID;
1080+
return binary;
10791081
} else {
10801082
// The rope either has a broken code range or isn't ASCII-compatible. In the case of a broken
10811083
// code range, we must perform a new code range scan with the target encoding to see if it's still

src/main/java/org/truffleruby/core/rope/RopeOperations.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
import org.truffleruby.collections.Memo;
4040
import org.truffleruby.core.Hashing;
4141
import org.truffleruby.core.encoding.EncodingManager;
42-
import org.truffleruby.core.rope.RopeNodes.WithEncodingNode;
4342
import org.truffleruby.core.rope.RopeNodesFactory.WithEncodingNodeGen;
4443
import org.truffleruby.core.string.StringAttributes;
4544
import org.truffleruby.core.string.StringOperations;

0 commit comments

Comments
 (0)