Skip to content

Commit 33005fb

Browse files
committed
[GR-51658] Avoid using org.jcodings.Encoding#getIndex()
PullRequest: truffleruby/4171
2 parents 88cbf96 + 9092f76 commit 33005fb

File tree

8 files changed

+80
-140
lines changed

8 files changed

+80
-140
lines changed

spec/ruby/optional/capi/encoding_spec.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -559,19 +559,19 @@
559559

560560
describe "rb_ascii8bit_encindex" do
561561
it "returns an index for the ASCII-8BIT encoding" do
562-
@s.rb_ascii8bit_encindex().should >= 0
562+
@s.rb_ascii8bit_encindex().should == 0
563563
end
564564
end
565565

566566
describe "rb_utf8_encindex" do
567567
it "returns an index for the UTF-8 encoding" do
568-
@s.rb_utf8_encindex().should >= 0
568+
@s.rb_utf8_encindex().should == 1
569569
end
570570
end
571571

572572
describe "rb_usascii_encindex" do
573573
it "returns an index for the US-ASCII encoding" do
574-
@s.rb_usascii_encindex().should >= 0
574+
@s.rb_usascii_encindex().should == 2
575575
end
576576
end
577577

src/main/java/org/truffleruby/core/encoding/EncodingManager.java

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,11 @@ public void defineEncodings() {
6767
}
6868

6969
private void initializeEncodings(RubyClass encodingClass) {
70-
var iterator = EncodingDB.getEncodings().entryIterator();
71-
while (iterator.hasNext()) {
72-
var entry = iterator.next();
73-
if (entry.value.getEncoding() == Encodings.DUMMY_ENCODING_BASE) {
74-
continue;
75-
}
76-
final RubyEncoding rubyEncoding = defineBuiltInEncoding(entry.value);
77-
for (String constName : EncodingUtils.encodingNames(entry.bytes, entry.p, entry.end)) {
78-
encodingClass.fields.setConstant(context, null, constName, rubyEncoding);
70+
for (RubyEncoding encoding : Encodings.BUILT_IN_ENCODINGS) {
71+
defineBuiltInEncoding(encoding);
72+
byte[] name = encoding.jcoding.getName();
73+
for (String constName : EncodingUtils.encodingNames(name, 0, name.length)) {
74+
encodingClass.fields.setConstant(context, null, constName, encoding);
7975
}
8076
}
8177
}
@@ -186,22 +182,6 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio
186182
localeEncoding = rubyEncoding;
187183
}
188184

189-
@TruffleBoundary
190-
public static Encoding getEncoding(String name) {
191-
byte[] nameBytes = StringOperations.encodeAsciiBytes(name);
192-
EncodingDB.Entry entry = EncodingDB.getEncodings().get(nameBytes);
193-
194-
if (entry == null) {
195-
entry = EncodingDB.getAliases().get(nameBytes);
196-
}
197-
198-
if (entry != null) {
199-
return entry.getEncoding();
200-
}
201-
202-
return null;
203-
}
204-
205185
public synchronized Object[] getEncodingList() {
206186
return ArrayUtils.copyOf(ENCODING_LIST_BY_ENCODING_INDEX, ENCODING_LIST_BY_ENCODING_INDEX.length);
207187
}
@@ -238,16 +218,13 @@ RubyEncoding getRubyEncoding(int encodingIndex) {
238218
}
239219

240220
@TruffleBoundary
241-
public synchronized RubyEncoding defineBuiltInEncoding(EncodingDB.Entry encodingEntry) {
242-
final int encodingIndex = encodingEntry.getEncoding().getIndex();
243-
final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encodingEntry.getEncoding());
221+
public void defineBuiltInEncoding(RubyEncoding rubyEncoding) {
222+
final int encodingIndex = rubyEncoding.index;
244223

245224
assert ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex] == null;
246225
ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex] = rubyEncoding;
247226

248227
addToLookup(rubyEncoding.toString(), rubyEncoding);
249-
return rubyEncoding;
250-
251228
}
252229

253230
@TruffleBoundary

src/main/java/org/truffleruby/core/encoding/Encodings.java

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,22 @@ public final class Encodings {
3838

3939
public static final int INITIAL_NUMBER_OF_ENCODINGS = EncodingDB.getEncodings().size();
4040
public static final int MAX_NUMBER_OF_ENCODINGS = 256;
41-
public static final RubyEncoding US_ASCII = initializeUsAscii();
42-
private static final RubyEncoding[] BUILT_IN_ENCODINGS = initializeRubyEncodings();
43-
44-
public static final RubyEncoding BINARY = BUILT_IN_ENCODINGS[ASCIIEncoding.INSTANCE.getIndex()];
45-
public static final RubyEncoding UTF_8 = BUILT_IN_ENCODINGS[UTF8Encoding.INSTANCE.getIndex()];
46-
public static final RubyEncoding UTF16LE = BUILT_IN_ENCODINGS[UTF16LEEncoding.INSTANCE.getIndex()];
47-
public static final RubyEncoding UTF16BE = BUILT_IN_ENCODINGS[UTF16BEEncoding.INSTANCE.getIndex()];
48-
public static final RubyEncoding UTF32LE = BUILT_IN_ENCODINGS[UTF32LEEncoding.INSTANCE.getIndex()];
49-
public static final RubyEncoding UTF32BE = BUILT_IN_ENCODINGS[UTF32BEEncoding.INSTANCE.getIndex()];
50-
public static final RubyEncoding ISO_8859_1 = BUILT_IN_ENCODINGS[ISO8859_1Encoding.INSTANCE.getIndex()];
51-
public static final RubyEncoding UTF16_DUMMY = BUILT_IN_ENCODINGS[EncodingDB
52-
.getEncodings()
53-
.get(StringOperations.encodeAsciiBytes("UTF-16"))
54-
.getEncoding()
55-
.getIndex()];
56-
public static final RubyEncoding UTF32_DUMMY = BUILT_IN_ENCODINGS[EncodingDB
57-
.getEncodings()
58-
.get(StringOperations.encodeAsciiBytes("UTF-32"))
59-
.getEncoding()
60-
.getIndex()];
41+
public static final int US_ASCII_INDEX = getUsAsciiIndex();
42+
public static final RubyEncoding US_ASCII = new RubyEncoding(US_ASCII_INDEX);
43+
static final RubyEncoding[] BUILT_IN_ENCODINGS = initializeRubyEncodings();
44+
private static final RubyEncoding[] BUILT_IN_ENCODINGS_BY_JCODING_INDEX = initializeBuiltinEncodingsByJCodingIndex();
45+
46+
public static final RubyEncoding BINARY = getBuiltInEncoding(ASCIIEncoding.INSTANCE);
47+
public static final RubyEncoding UTF_8 = getBuiltInEncoding(UTF8Encoding.INSTANCE);
48+
public static final RubyEncoding UTF16LE = getBuiltInEncoding(UTF16LEEncoding.INSTANCE);
49+
public static final RubyEncoding UTF16BE = getBuiltInEncoding(UTF16BEEncoding.INSTANCE);
50+
public static final RubyEncoding UTF32LE = getBuiltInEncoding(UTF32LEEncoding.INSTANCE);
51+
public static final RubyEncoding UTF32BE = getBuiltInEncoding(UTF32BEEncoding.INSTANCE);
52+
public static final RubyEncoding ISO_8859_1 = getBuiltInEncoding(ISO8859_1Encoding.INSTANCE);
53+
public static final RubyEncoding UTF16_DUMMY = getBuiltInEncoding(
54+
EncodingDB.getEncodings().get(StringOperations.encodeAsciiBytes("UTF-16")).getEncoding());
55+
public static final RubyEncoding UTF32_DUMMY = getBuiltInEncoding(
56+
EncodingDB.getEncodings().get(StringOperations.encodeAsciiBytes("UTF-32")).getEncoding());
6157

6258
/** On Linux and macOS the filesystem encoding is always UTF-8 */
6359
public static final RubyEncoding FILESYSTEM = UTF_8;
@@ -68,29 +64,42 @@ public final class Encodings {
6864
public Encodings() {
6965
}
7066

71-
private static RubyEncoding initializeUsAscii() {
72-
final Encoding encoding = USASCIIEncoding.INSTANCE;
73-
return new RubyEncoding(encoding.getIndex());
67+
private static int getUsAsciiIndex() {
68+
int index = 0;
69+
for (var entry : EncodingDB.getEncodings()) {
70+
if (entry.getEncoding() == USASCIIEncoding.INSTANCE) {
71+
return index;
72+
}
73+
index++;
74+
}
75+
throw CompilerDirectives.shouldNotReachHere("No US-ASCII");
7476
}
7577

7678
private static RubyEncoding[] initializeRubyEncodings() {
7779
final RubyEncoding[] encodings = new RubyEncoding[INITIAL_NUMBER_OF_ENCODINGS];
80+
81+
int index = 0;
7882
for (var entry : EncodingDB.getEncodings()) {
7983
final Encoding encoding = entry.getEncoding();
8084

8185
final RubyEncoding rubyEncoding;
8286
if (encoding == USASCIIEncoding.INSTANCE) {
87+
assert index == US_ASCII_INDEX;
8388
rubyEncoding = US_ASCII;
8489
} else {
8590
TruffleString tstring = TStringConstants.TSTRING_CONSTANTS.get(encoding.toString());
8691
if (tstring == null) {
8792
throw CompilerDirectives.shouldNotReachHere("no TStringConstants for " + encoding);
8893
}
8994
final ImmutableRubyString name = FrozenStringLiterals.createStringAndCacheLater(tstring, US_ASCII);
90-
rubyEncoding = new RubyEncoding(encoding, name, encoding.getIndex());
95+
rubyEncoding = new RubyEncoding(encoding, name, index);
9196
}
92-
encodings[encoding.getIndex()] = rubyEncoding;
97+
encodings[index] = rubyEncoding;
98+
99+
index++;
93100
}
101+
102+
assert index == EncodingDB.getEncodings().size();
94103
return encodings;
95104
}
96105

@@ -108,23 +117,36 @@ public static RubyEncoding newRubyEncoding(RubyLanguage language, Encoding encod
108117
return new RubyEncoding(encoding, string, index);
109118
}
110119

120+
public static RubyEncoding[] initializeBuiltinEncodingsByJCodingIndex() {
121+
final RubyEncoding[] encodings = new RubyEncoding[INITIAL_NUMBER_OF_ENCODINGS];
122+
for (RubyEncoding encoding : BUILT_IN_ENCODINGS) {
123+
// This and the usage in getBuiltInEncoding() below should be the only usages of org.jcodings.Encoding#getIndex().
124+
// That index is not deterministic and depends on classloading, so use it as little as possible.
125+
encodings[encoding.jcoding.getIndex()] = encoding;
126+
}
127+
return encodings;
128+
}
129+
111130
/** Should only be used when there is no other way, because this will ignore replicated and dummy encodings */
112131
public static RubyEncoding getBuiltInEncoding(Encoding jcoding) {
113-
var rubyEncoding = BUILT_IN_ENCODINGS[jcoding.getIndex()];
132+
var rubyEncoding = BUILT_IN_ENCODINGS_BY_JCODING_INDEX[jcoding.getIndex()];
114133
assert rubyEncoding.jcoding == jcoding;
115134
return rubyEncoding;
116135
}
117136

118-
/** Should only be used when there is no other way, because this will ignore replicated and dummy encodings */
119-
public static RubyEncoding getBuiltInEncoding(String encodingName) {
120-
byte[] encodingNameBytes = encodingName.getBytes(StandardCharsets.ISO_8859_1);
121-
var entry = EncodingDB.getEncodings().get(encodingNameBytes);
137+
@TruffleBoundary
138+
public static RubyEncoding getBuiltInEncoding(String name) {
139+
byte[] nameBytes = StringOperations.encodeAsciiBytes(name);
140+
EncodingDB.Entry entry = EncodingDB.getEncodings().get(nameBytes);
141+
142+
if (entry == null) {
143+
entry = EncodingDB.getAliases().get(nameBytes);
144+
}
145+
122146
if (entry != null) {
123-
var jcoding = entry.getEncoding();
124-
return getBuiltInEncoding(jcoding);
125-
} else {
126-
throw CompilerDirectives.shouldNotReachHere("Unknown encoding: " + encodingName);
147+
return getBuiltInEncoding(entry.getEncoding());
127148
}
128-
}
129149

150+
return null;
151+
}
130152
}

src/main/java/org/truffleruby/core/string/BytesKey.java

Lines changed: 0 additions & 50 deletions
This file was deleted.

src/main/java/org/truffleruby/core/string/InterpolatedStringNode.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010
package org.truffleruby.core.string;
1111

1212
import com.oracle.truffle.api.strings.TruffleString;
13-
import org.jcodings.Encoding;
1413
import org.truffleruby.core.cast.ToSNode;
15-
import org.truffleruby.core.encoding.Encodings;
1614
import org.truffleruby.core.encoding.RubyEncoding;
1715
import org.truffleruby.language.RubyContextSourceNode;
1816

@@ -31,11 +29,7 @@ public final class InterpolatedStringNode extends RubyContextSourceNode {
3129
private final RubyEncoding encoding;
3230
private final TruffleString emptyTString;
3331

34-
public InterpolatedStringNode(ToSNode[] children, Encoding encoding) {
35-
this(children, Encodings.getBuiltInEncoding(encoding));
36-
}
37-
38-
private InterpolatedStringNode(ToSNode[] children, RubyEncoding encoding) {
32+
public InterpolatedStringNode(ToSNode[] children, RubyEncoding encoding) {
3933
assert children.length > 0;
4034
this.children = children;
4135
this.encoding = encoding;

src/main/java/org/truffleruby/parser/MagicCommentParser.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414

1515
import com.oracle.truffle.api.strings.InternalByteArray;
1616
import com.oracle.truffle.api.strings.TruffleString;
17-
import org.jcodings.Encoding;
1817
import org.truffleruby.collections.Memo;
19-
import org.truffleruby.core.encoding.EncodingManager;
2018
import org.truffleruby.core.encoding.Encodings;
2119
import org.truffleruby.core.encoding.RubyEncoding;
2220
import org.truffleruby.core.encoding.TStringUtils;
@@ -95,9 +93,9 @@ public static RubyEncoding parseMagicEncodingComment(TStringWithEncoding source)
9593
parser_magic_comment(magicLine, 0, magicLineLength,
9694
(name, value) -> {
9795
if (isMagicEncodingComment(name)) {
98-
Encoding jcoding = EncodingManager.getEncoding(value.toJavaStringUncached());
99-
if (jcoding != null) {
100-
encoding.set(Encodings.getBuiltInEncoding(jcoding));
96+
RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(value.toJavaStringUncached());
97+
if (rubyEncoding != null) {
98+
encoding.set(rubyEncoding);
10199
return true;
102100
}
103101
}
@@ -107,9 +105,9 @@ public static RubyEncoding parseMagicEncodingComment(TStringWithEncoding source)
107105
if (encoding.get() == null) {
108106
TruffleString encodingName = get_file_encoding(magicLine);
109107
if (encodingName != null) {
110-
Encoding jcoding = EncodingManager.getEncoding(encodingName.toJavaStringUncached());
111-
if (jcoding != null) {
112-
encoding.set(Encodings.getBuiltInEncoding(jcoding));
108+
RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encodingName.toJavaStringUncached());
109+
if (rubyEncoding != null) {
110+
encoding.set(rubyEncoding);
113111
}
114112
}
115113
}

src/main/java/org/truffleruby/parser/YARPLoader.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,17 @@ public static ParseResult load(byte[] serialized, Nodes.Source source, RubySourc
5151
return new YARPLoader(serialized, source, rubySource).load();
5252
}
5353

54-
private final RubySource rubySource;
55-
private RubyEncoding encoding = null;
54+
private final RubyEncoding encoding;
5655

5756
public YARPLoader(byte[] serialized, Nodes.Source source, RubySource rubySource) {
5857
super(serialized, source);
59-
this.rubySource = rubySource;
58+
this.encoding = rubySource.getEncoding();
6059
}
6160

6261
@Override
6362
public Charset getEncodingCharset(String encodingName) {
64-
encoding = Encodings.getBuiltInEncoding(encodingName);
65-
assert encoding == rubySource.getEncoding();
63+
var rubyEncoding = Encodings.getBuiltInEncoding(encodingName);
64+
assert rubyEncoding == encoding : rubyEncoding + " (" + encodingName + ") vs " + encoding;
6665
return null; // encodingCharset is not used
6766
}
6867

src/main/java/org/truffleruby/parser/YARPTranslator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2424,14 +2424,14 @@ public RubyNode visitInterpolatedStringNode(Nodes.InterpolatedStringNode node) {
24242424

24252425
final ToSNode[] children = translateInterpolatedParts(node.parts);
24262426

2427-
final RubyNode rubyNode = new InterpolatedStringNode(children, sourceEncoding.jcoding);
2427+
final RubyNode rubyNode = new InterpolatedStringNode(children, sourceEncoding);
24282428
return assignPositionAndFlags(node, rubyNode);
24292429
}
24302430

24312431
@Override
24322432
public RubyNode visitInterpolatedSymbolNode(Nodes.InterpolatedSymbolNode node) {
24332433
final ToSNode[] children = translateInterpolatedParts(node.parts);
2434-
final RubyNode stringNode = new InterpolatedStringNode(children, sourceEncoding.jcoding);
2434+
final RubyNode stringNode = new InterpolatedStringNode(children, sourceEncoding);
24352435

24362436
final RubyNode rubyNode = StringToSymbolNodeGen.create(stringNode);
24372437
return assignPositionAndFlags(node, rubyNode);

0 commit comments

Comments
 (0)