Skip to content

Commit 3cee1ac

Browse files
committed
Remove unnecessary data copies when working with the frozen string table.
By restructuring to work with `InternalByteArray`, we can now support working with native strings as well. The `InternalByteArray` will make a copy of the native memory into a Java `byte[]`, which won't change behind our backs.
1 parent 1eff21e commit 3cee1ac

File tree

7 files changed

+58
-31
lines changed

7 files changed

+58
-31
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ New features:
77
Bug fixes:
88

99
* Fix `Dir.glob` returning blank string entry with leading `**/` in glob and `base:` argument (@rwstauner).
10-
* Fix class lookup after an object's class has been replaced by `IO#reopen` (@itarato, @eregon).
10+
* Fix class lookup after an object's class has been replaced by `IO#reopen` (@itarato, @nirvdrum, @eregon).
1111
* Fix `Marshal.load` and raise `ArgumentError` when dump is broken and is too short (#3108, @andrykonchin).
1212
* Fix `super` method lookup for unbounded attached methods (#3131, @itarato).
1313
* Fix `Module#define_method(name, Method)` to respect `module_function` visibility (#3181, @andrykonchin).
@@ -43,6 +43,7 @@ Compatibility:
4343
Performance:
4444

4545
* Improve `Truffle::FeatureLoader.loaded_feature_path` by removing expensive string ops from a loop. Speeds up feature lookup time (#3010, @itarato).
46+
* Improve `String#-@` performance by reducing unnecessary data copying and supporting substring lookups (@nirvdrum)
4647

4748
Changes:
4849

src/main/java/org/truffleruby/RubyLanguage.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import com.oracle.truffle.api.source.Source;
3434
import com.oracle.truffle.api.source.SourceSection;
3535
import com.oracle.truffle.api.strings.AbstractTruffleString;
36+
import com.oracle.truffle.api.strings.InternalByteArray;
3637
import com.oracle.truffle.api.strings.TruffleString;
3738
import org.graalvm.options.OptionDescriptors;
3839
import org.truffleruby.annotations.SuppressFBWarnings;
@@ -788,6 +789,11 @@ public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEnc
788789
return frozenStringLiterals.getFrozenStringLiteral(tstring, encoding);
789790
}
790791

792+
public ImmutableRubyString getFrozenStringLiteral(InternalByteArray byteArray, boolean isImmutable,
793+
RubyEncoding encoding) {
794+
return frozenStringLiterals.getFrozenStringLiteral(byteArray, isImmutable, encoding);
795+
}
796+
791797
public long getNextObjectID() {
792798
final long id = nextObjectID.getAndAdd(ObjectSpaceManager.OBJECT_ID_INCREMENT_BY);
793799

src/main/java/org/truffleruby/core/encoding/TStringUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,10 @@ public static String toJavaStringOrThrow(AbstractTruffleString tstring, RubyEnco
152152
return tstring.toJavaStringUncached();
153153
}
154154
}
155+
156+
public static boolean hasImmutableInternalByteArray(AbstractTruffleString string) {
157+
// Immutable strings trivially have immutable byte arrays.
158+
// Native strings also have immutable byte arrays because we need to copy the data into Java.
159+
return string.isImmutable() || string.isNative();
160+
}
155161
}

src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import com.oracle.truffle.api.CompilerDirectives;
1313
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
14+
import com.oracle.truffle.api.strings.InternalByteArray;
1415
import com.oracle.truffle.api.strings.TruffleString;
1516
import org.truffleruby.collections.WeakValueCache;
1617
import org.truffleruby.core.encoding.RubyEncoding;
@@ -37,34 +38,23 @@ public FrozenStringLiterals(TStringCache tStringCache) {
3738

3839
@TruffleBoundary
3940
public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEncoding encoding) {
40-
if (tstring.isNative()) {
41-
throw CompilerDirectives.shouldNotReachHere();
42-
}
43-
44-
// Ensure all ImmutableRubyString have a TruffleString from the TStringCache
45-
var cachedTString = tstringCache.getTString(tstring, encoding);
46-
var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding);
47-
48-
final ImmutableRubyString string = values.get(tstringWithEncoding);
49-
if (string != null) {
50-
return string;
51-
} else {
52-
return getFrozenStringLiteral(TStringUtils.getBytesOrCopy(tstring, encoding), encoding);
53-
}
41+
return getFrozenStringLiteral(tstring.getInternalByteArrayUncached(encoding.tencoding),
42+
TStringUtils.hasImmutableInternalByteArray(tstring),
43+
encoding);
5444
}
5545

5646
@TruffleBoundary
57-
public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, RubyEncoding encoding) {
47+
public ImmutableRubyString getFrozenStringLiteral(InternalByteArray byteArray, boolean isImmutable,
48+
RubyEncoding encoding) {
5849
// Ensure all ImmutableRubyString have a TruffleString from the TStringCache
59-
var cachedTString = tstringCache.getTString(bytes, encoding);
50+
var cachedTString = tstringCache.getTString(byteArray, isImmutable, encoding);
6051
var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding);
6152

6253
final ImmutableRubyString string = values.get(tstringWithEncoding);
6354
if (string != null) {
6455
return string;
6556
} else {
66-
return values.addInCacheIfAbsent(tstringWithEncoding,
67-
new ImmutableRubyString(cachedTString, encoding));
57+
return values.addInCacheIfAbsent(tstringWithEncoding, new ImmutableRubyString(cachedTString, encoding));
6858
}
6959
}
7060

src/main/java/org/truffleruby/core/string/StringNodes.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4357,10 +4357,11 @@ public abstract static class InternNode extends PrimitiveArrayArgumentsNode {
43574357
@Specialization
43584358
protected ImmutableRubyString internString(RubyString string,
43594359
@Cached RubyStringLibrary libString,
4360-
@Cached TruffleString.AsManagedNode asManagedNode) {
4360+
@Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) {
43614361
var encoding = libString.getEncoding(string);
4362-
TruffleString immutableManagedString = asManagedNode.execute(string.tstring, encoding.tencoding);
4363-
return getLanguage().getFrozenStringLiteral(immutableManagedString, encoding);
4362+
var byteArray = getInternalByteArrayNode.execute(string.tstring, encoding.tencoding);
4363+
return getLanguage().getFrozenStringLiteral(byteArray,
4364+
TStringUtils.hasImmutableInternalByteArray(string.tstring), encoding);
43644365
}
43654366
}
43664367

src/main/java/org/truffleruby/core/string/TBytesKey.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,37 @@ public final class TBytesKey {
2323
private final byte[] bytes;
2424
private final int offset;
2525
private final int length;
26+
private final boolean isImmutable;
2627
private RubyEncoding encoding;
2728
private final int bytesHashCode;
2829

29-
public TBytesKey(byte[] bytes, int offset, int length, int bytesHashCode, RubyEncoding encoding) {
30+
public TBytesKey(
31+
byte[] bytes,
32+
int offset,
33+
int length,
34+
boolean isImmutable,
35+
int bytesHashCode,
36+
RubyEncoding encoding) {
3037
this.bytes = bytes;
3138
this.offset = offset;
3239
this.length = length;
40+
this.isImmutable = isImmutable;
3341
this.bytesHashCode = bytesHashCode;
3442
this.encoding = encoding;
3543
}
3644

3745
public TBytesKey(byte[] bytes, RubyEncoding encoding) {
38-
this(bytes, 0, bytes.length, Arrays.hashCode(bytes), encoding);
46+
this(bytes, 0, bytes.length, true, Arrays.hashCode(bytes), encoding);
3947
}
4048

41-
public TBytesKey(InternalByteArray byteArray, RubyEncoding encoding) {
42-
this(byteArray.getArray(), byteArray.getOffset(), byteArray.getLength(), hashCode(byteArray), encoding);
49+
public TBytesKey(InternalByteArray byteArray, boolean isImmutable, RubyEncoding encoding) {
50+
this(
51+
byteArray.getArray(),
52+
byteArray.getOffset(),
53+
byteArray.getLength(),
54+
isImmutable,
55+
hashCode(byteArray),
56+
encoding);
4357
}
4458

4559
@Override
@@ -104,17 +118,16 @@ private boolean isPerfectFit() {
104118
}
105119

106120
public TBytesKey makeCacheable() {
107-
if (isPerfectFit()) {
108-
// TODO (nirvdrum 2023-Jun-17): We can avoid cloning the key if we know the byte array came from an immutable string.
109-
return new TBytesKey(bytes.clone(), encoding);
121+
if (isImmutable && isPerfectFit()) {
122+
return new TBytesKey(bytes, encoding);
110123
}
111124

112125
var simplified = ArrayUtils.extractRange(this.bytes, this.offset, this.offset + this.length);
113126
return new TBytesKey(simplified, encoding);
114127
}
115128

116129
public TBytesKey withNewEncoding(RubyEncoding encoding) {
117-
return new TBytesKey(bytes, offset, length, bytesHashCode, encoding);
130+
return new TBytesKey(bytes, offset, length, isImmutable, bytesHashCode, encoding);
118131
}
119132

120133
public TruffleString toTruffleString() {

src/main/java/org/truffleruby/core/string/TStringCache.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
*/
1010
package org.truffleruby.core.string;
1111

12+
import com.oracle.truffle.api.strings.InternalByteArray;
1213
import com.oracle.truffle.api.strings.TruffleString;
1314
import org.truffleruby.collections.WeakValueCache;
1415
import org.truffleruby.core.encoding.Encodings;
@@ -74,11 +75,20 @@ public TruffleString getTString(TruffleString string, RubyEncoding rubyEncoding)
7475
assert rubyEncoding != null;
7576

7677
var byteArray = string.getInternalByteArrayUncached(rubyEncoding.tencoding);
77-
final TBytesKey key = new TBytesKey(byteArray, rubyEncoding);
78+
final TBytesKey key = new TBytesKey(byteArray, TStringUtils.hasImmutableInternalByteArray(string),
79+
rubyEncoding);
7880

7981
return getTString(key);
8082
}
8183

84+
@TruffleBoundary
85+
public TruffleString getTString(InternalByteArray byteArray, boolean isImmutable, RubyEncoding rubyEncoding) {
86+
assert rubyEncoding != null;
87+
88+
return getTString(new TBytesKey(byteArray, isImmutable, rubyEncoding));
89+
}
90+
91+
@TruffleBoundary
8292
public TruffleString getTString(byte[] bytes, RubyEncoding rubyEncoding) {
8393
assert rubyEncoding != null;
8494

0 commit comments

Comments
 (0)