Skip to content

Commit 70b8700

Browse files
committed
[GR-11465] Optimize rb_str_set_len()
PullRequest: truffleruby/534
2 parents 7932509 + f226c4b commit 70b8700

File tree

5 files changed

+87
-54
lines changed

5 files changed

+87
-54
lines changed

spec/ruby/optional/capi/string_spec.rb

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,44 +32,56 @@ def to_str
3232
end
3333
end
3434

35-
describe "rb_str_set_len" do
36-
before :each do
37-
# Make a completely new copy of the string
38-
# for every example (#dup doesn't cut it).
39-
@str = "abcdefghij"[0..-1]
40-
end
35+
[Encoding::BINARY, Encoding::UTF_8].each do |enc|
36+
describe "rb_str_set_len on a #{enc.name} String" do
37+
before :each do
38+
# Make a completely new copy of the string
39+
# for every example (#dup doesn't cut it).
40+
@str = "abcdefghij".force_encoding(enc)[0..-1]
41+
end
4142

42-
it "reduces the size of the string" do
43-
@s.rb_str_set_len(@str, 5).should == "abcde"
44-
end
43+
it "reduces the size of the string" do
44+
@s.rb_str_set_len(@str, 5).should == "abcde"
45+
end
4546

46-
it "inserts a NULL byte at the length" do
47-
@s.rb_str_set_len(@str, 5).should == "abcde"
48-
@s.rb_str_set_len(@str, 8).should == "abcde\x00gh"
49-
end
47+
it "inserts a NULL byte at the length" do
48+
@s.rb_str_set_len(@str, 5).should == "abcde"
49+
@s.rb_str_set_len(@str, 8).should == "abcde\x00gh"
50+
end
5051

51-
it "updates the byte size and character size" do
52-
@s.rb_str_set_len(@str, 4)
53-
@str.bytesize.should == 4
54-
@str.size.should == 4
55-
@str.should == "abcd"
56-
end
52+
it "updates the byte size" do
53+
@s.rb_str_set_len(@str, 4)
54+
@str.bytesize.should == 4
55+
@str.should == "abcd"
56+
end
5757

58-
it "updates the string's attributes visible in C code" do
59-
@s.rb_str_set_len_RSTRING_LEN(@str, 4).should == 4
60-
end
58+
it "invalidates the character size" do
59+
@str.size.should == 10
60+
@s.rb_str_set_len(@str, 4)
61+
@str.size.should == 4
62+
@str.should == "abcd"
63+
end
6164

62-
it "can reveal characters written from C with RSTRING_PTR" do
63-
@s.rb_str_set_len(@str, 1)
64-
@str.should == "a"
65+
it "invalidates the code range" do
66+
@s.rb_str_set_len(@str, 4)
67+
@str.ascii_only?.should == true
68+
end
6569

66-
@str.force_encoding(Encoding::UTF_8)
67-
@s.RSTRING_PTR_set(@str, 1, 'B'.ord)
68-
@s.RSTRING_PTR_set(@str, 2, 'C'.ord)
69-
@s.rb_str_set_len(@str, 3)
70+
it "updates the string's attributes visible in C code" do
71+
@s.rb_str_set_len_RSTRING_LEN(@str, 4).should == 4
72+
end
73+
74+
it "can reveal characters written from C with RSTRING_PTR" do
75+
@s.rb_str_set_len(@str, 1)
76+
@str.should == "a"
7077

71-
@str.bytesize.should == 3
72-
@str.should == "aBC"
78+
@s.RSTRING_PTR_set(@str, 1, 'B'.ord)
79+
@s.RSTRING_PTR_set(@str, 2, 'C'.ord)
80+
@s.rb_str_set_len(@str, 3)
81+
82+
@str.bytesize.should == 3
83+
@str.should == "aBC"
84+
end
7385
end
7486
end
7587

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
fails:SafeStringValue raises for tained string when $SAFE is 1
2+
fails:C-API String function rb_str_set_len on a ASCII-8BIT String invalidates the code range

src/main/c/cext/ruby.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ VALUE rb_str_to_str(VALUE string) {
793793
}
794794

795795
VALUE rb_str_buf_new(long capacity) {
796-
VALUE str = rb_str_new(0, capacity);
796+
VALUE str = rb_str_new(NULL, capacity);
797797
rb_str_set_len(str, 0);
798798
return str;
799799
}
@@ -812,7 +812,7 @@ VALUE rb_str_concat(VALUE string, VALUE to_concat) {
812812

813813
void rb_str_set_len(VALUE string, long length) {
814814
long capacity = rb_str_capacity(string);
815-
if (length > capacity) {
815+
if (length > capacity || length < 0) {
816816
rb_raise(rb_eRuntimeError, "probable buffer overflow: %ld for %ld", length, capacity);
817817
}
818818
rb_tr_wrap(polyglot_invoke(RUBY_CEXT, "rb_str_set_len", rb_tr_unwrap(string), length));

src/main/java/org/truffleruby/cext/CExtNodes.java

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.oracle.truffle.api.profiles.ConditionProfile;
2929
import com.oracle.truffle.api.source.SourceSection;
3030
import org.jcodings.Encoding;
31+
import org.jcodings.specific.ASCIIEncoding;
3132
import org.jcodings.specific.UTF8Encoding;
3233
import org.truffleruby.Layouts;
3334
import org.truffleruby.RubyContext;
@@ -63,6 +64,7 @@
6364
import org.truffleruby.core.string.StringSupport;
6465
import org.truffleruby.interop.ToJavaStringNodeGen;
6566
import org.truffleruby.language.LexicalScope;
67+
import org.truffleruby.language.NotOptimizedWarningNode;
6668
import org.truffleruby.language.NotProvided;
6769
import org.truffleruby.language.RubyBaseNode;
6870
import org.truffleruby.language.RubyGuards;
@@ -443,15 +445,43 @@ public long capacity(DynamicObject string,
443445
@CoreMethod(names = "rb_str_set_len", onSingleton = true, required = 2, lowerFixnum = 2)
444446
public abstract static class RbStrSetLenNode extends CoreMethodArrayArgumentsNode {
445447

448+
@Child NotOptimizedWarningNode notOptimizedWarningNode;
449+
446450
@Specialization
447-
public DynamicObject strSetLen(DynamicObject string, int len,
448-
@Cached("create()") StringToNativeNode stringToNativeNode) {
451+
public DynamicObject strSetLen(DynamicObject string, int newByteLength,
452+
@Cached("create()") StringToNativeNode stringToNativeNode,
453+
@Cached("createBinaryProfile()") ConditionProfile binaryProfile) {
449454
final NativeRope nativeRope = stringToNativeNode.executeToNative(string);
450-
final NativeRope newNativeRope = nativeRope.withByteLength(len);
455+
final NativeRope newNativeRope;
456+
457+
if (binaryProfile.profile(nativeRope.getEncoding() == ASCIIEncoding.INSTANCE)) {
458+
// TODO (eregon, 17 Jan 2019): We use CR_VALID here as zlib relies on it. Computing
459+
// the coderange here on every call is too slow. A proper fix is to compute the
460+
// CodeRange lazily on NativeRope.
461+
newNativeRope = nativeRope.withByteLength(newByteLength, newByteLength, CodeRange.CR_VALID);
462+
} else {
463+
performanceWarn("calling rb_str_set_len() on non-binary string is not yet optimized");
464+
465+
final byte[] bytes = new byte[newByteLength];
466+
nativeRope.getNativePointer().readBytes(0, bytes, 0, newByteLength);
467+
final long packedLengthAndCodeRange = RopeOperations.calculateCodeRangeAndLength(nativeRope.getEncoding(), bytes, 0, newByteLength);
468+
final CodeRange codeRange = CodeRange.fromInt(StringSupport.unpackArg(packedLengthAndCodeRange));
469+
final int characterLength = StringSupport.unpackResult(packedLengthAndCodeRange);
470+
newNativeRope = nativeRope.withByteLength(newByteLength, characterLength, codeRange);
471+
}
472+
451473
StringOperations.setRope(string, newNativeRope);
452474
return string;
453475
}
454476

477+
private void performanceWarn(String message) {
478+
if (notOptimizedWarningNode == null) {
479+
CompilerDirectives.transferToInterpreterAndInvalidate();
480+
notOptimizedWarningNode = insert(new NotOptimizedWarningNode());
481+
}
482+
notOptimizedWarningNode.warn(message);
483+
}
484+
455485
}
456486

457487
@CoreMethod(names = "rb_str_resize", onSingleton = true, required = 2, lowerFixnum = 2)

src/main/java/org/truffleruby/core/rope/NativeRope.java

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,27 @@
1111

1212
import org.jcodings.Encoding;
1313
import org.truffleruby.core.FinalizationService;
14-
import org.truffleruby.core.string.StringSupport;
1514

1615
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
1716
import org.truffleruby.extra.ffi.Pointer;
1817

1918
public class NativeRope extends Rope {
2019

21-
private Pointer pointer;
20+
private final Pointer pointer;
2221

2322
public NativeRope(FinalizationService finalizationService, byte[] bytes, Encoding encoding, int characterLength, CodeRange codeRange) {
24-
this(createNativePointer(finalizationService, bytes), bytes.length, encoding, characterLength, codeRange);
23+
this(allocateNativePointer(finalizationService, bytes), bytes.length, encoding, characterLength, codeRange);
2524
}
2625

27-
private static Pointer createNativePointer(FinalizationService finalizationService, byte[] bytes) {
26+
private static Pointer allocateNativePointer(FinalizationService finalizationService, byte[] bytes) {
2827
final Pointer pointer = Pointer.malloc(bytes.length + 1);
2928
pointer.enableAutorelease(finalizationService);
3029
pointer.writeBytes(0, bytes, 0, bytes.length);
3130
pointer.writeByte(bytes.length, (byte) 0);
3231
return pointer;
3332
}
3433

35-
private static Pointer createNativePointer(FinalizationService finalizationService, Pointer existing) {
34+
private static Pointer copyNativePointer(FinalizationService finalizationService, Pointer existing) {
3635
final Pointer pointer = Pointer.malloc(existing.getSize());
3736
pointer.enableAutorelease(finalizationService);
3837
pointer.writeBytes(0, existing, 0, existing.getSize());
@@ -44,22 +43,13 @@ private NativeRope(Pointer pointer, int byteLength, Encoding encoding, int chara
4443
this.pointer = pointer;
4544
}
4645

47-
@TruffleBoundary
48-
public NativeRope withByteLength(int newByteLength) {
49-
final byte[] bytes = new byte[newByteLength];
50-
pointer.readBytes(0, bytes, 0, newByteLength);
51-
52-
final long packedLengthAndCodeRange = RopeOperations.calculateCodeRangeAndLength(getEncoding(), bytes, 0, newByteLength);
53-
final CodeRange codeRange = CodeRange.fromInt(StringSupport.unpackArg(packedLengthAndCodeRange));
54-
final int characterLength = StringSupport.unpackResult(packedLengthAndCodeRange);
55-
56-
getNativePointer().writeByte(newByteLength, (byte) 0); // Like MRI
57-
58-
return new NativeRope(getNativePointer(), newByteLength, getEncoding(), characterLength, codeRange);
46+
public NativeRope withByteLength(int newByteLength, int characterLength, CodeRange codeRange) {
47+
pointer.writeByte(newByteLength, (byte) 0); // Like MRI
48+
return new NativeRope(pointer, newByteLength, getEncoding(), characterLength, codeRange);
5949
}
6050

6151
public NativeRope makeCopy(FinalizationService finalizationService) {
62-
final Pointer newPointer = createNativePointer(finalizationService, pointer);
52+
final Pointer newPointer = copyNativePointer(finalizationService, pointer);
6353
return new NativeRope(newPointer, byteLength(), getEncoding(), characterLength(), getCodeRange());
6454
}
6555

0 commit comments

Comments
 (0)