@@ -38,26 +38,22 @@ public final class Encodings {
38
38
39
39
public static final int INITIAL_NUMBER_OF_ENCODINGS = EncodingDB .getEncodings ().size ();
40
40
public static final int MAX_NUMBER_OF_ENCODINGS = 256 ;
41
- public static final RubyEncoding US_ASCII = initializeUsAscii ();
42
- private static final RubyEncoding [] BUILT_IN_ENCODINGS = initializeRubyEncodings ();
43
-
44
- public static final RubyEncoding BINARY = BUILT_IN_ENCODINGS [ASCIIEncoding .INSTANCE .getIndex ()];
45
- public static final RubyEncoding UTF_8 = BUILT_IN_ENCODINGS [UTF8Encoding .INSTANCE .getIndex ()];
46
- public static final RubyEncoding UTF16LE = BUILT_IN_ENCODINGS [UTF16LEEncoding .INSTANCE .getIndex ()];
47
- public static final RubyEncoding UTF16BE = BUILT_IN_ENCODINGS [UTF16BEEncoding .INSTANCE .getIndex ()];
48
- public static final RubyEncoding UTF32LE = BUILT_IN_ENCODINGS [UTF32LEEncoding .INSTANCE .getIndex ()];
49
- public static final RubyEncoding UTF32BE = BUILT_IN_ENCODINGS [UTF32BEEncoding .INSTANCE .getIndex ()];
50
- public static final RubyEncoding ISO_8859_1 = BUILT_IN_ENCODINGS [ISO8859_1Encoding .INSTANCE .getIndex ()];
51
- public static final RubyEncoding UTF16_DUMMY = BUILT_IN_ENCODINGS [EncodingDB
52
- .getEncodings ()
53
- .get (StringOperations .encodeAsciiBytes ("UTF-16" ))
54
- .getEncoding ()
55
- .getIndex ()];
56
- public static final RubyEncoding UTF32_DUMMY = BUILT_IN_ENCODINGS [EncodingDB
57
- .getEncodings ()
58
- .get (StringOperations .encodeAsciiBytes ("UTF-32" ))
59
- .getEncoding ()
60
- .getIndex ()];
41
+ public static final int US_ASCII_INDEX = getUsAsciiIndex ();
42
+ public static final RubyEncoding US_ASCII = new RubyEncoding (US_ASCII_INDEX );
43
+ static final RubyEncoding [] BUILT_IN_ENCODINGS = initializeRubyEncodings ();
44
+ private static final RubyEncoding [] BUILT_IN_ENCODINGS_BY_JCODING_INDEX = initializeBuiltinEncodingsByJCodingIndex ();
45
+
46
+ public static final RubyEncoding BINARY = getBuiltInEncoding (ASCIIEncoding .INSTANCE );
47
+ public static final RubyEncoding UTF_8 = getBuiltInEncoding (UTF8Encoding .INSTANCE );
48
+ public static final RubyEncoding UTF16LE = getBuiltInEncoding (UTF16LEEncoding .INSTANCE );
49
+ public static final RubyEncoding UTF16BE = getBuiltInEncoding (UTF16BEEncoding .INSTANCE );
50
+ public static final RubyEncoding UTF32LE = getBuiltInEncoding (UTF32LEEncoding .INSTANCE );
51
+ public static final RubyEncoding UTF32BE = getBuiltInEncoding (UTF32BEEncoding .INSTANCE );
52
+ public static final RubyEncoding ISO_8859_1 = getBuiltInEncoding (ISO8859_1Encoding .INSTANCE );
53
+ public static final RubyEncoding UTF16_DUMMY = getBuiltInEncoding (
54
+ EncodingDB .getEncodings ().get (StringOperations .encodeAsciiBytes ("UTF-16" )).getEncoding ());
55
+ public static final RubyEncoding UTF32_DUMMY = getBuiltInEncoding (
56
+ EncodingDB .getEncodings ().get (StringOperations .encodeAsciiBytes ("UTF-32" )).getEncoding ());
61
57
62
58
/** On Linux and macOS the filesystem encoding is always UTF-8 */
63
59
public static final RubyEncoding FILESYSTEM = UTF_8 ;
@@ -68,29 +64,42 @@ public final class Encodings {
68
64
public Encodings () {
69
65
}
70
66
71
- private static RubyEncoding initializeUsAscii () {
72
- final Encoding encoding = USASCIIEncoding .INSTANCE ;
73
- return new RubyEncoding (encoding .getIndex ());
67
+ private static int getUsAsciiIndex () {
68
+ int index = 0 ;
69
+ for (var entry : EncodingDB .getEncodings ()) {
70
+ if (entry .getEncoding () == USASCIIEncoding .INSTANCE ) {
71
+ return index ;
72
+ }
73
+ index ++;
74
+ }
75
+ throw CompilerDirectives .shouldNotReachHere ("No US-ASCII" );
74
76
}
75
77
76
78
private static RubyEncoding [] initializeRubyEncodings () {
77
79
final RubyEncoding [] encodings = new RubyEncoding [INITIAL_NUMBER_OF_ENCODINGS ];
80
+
81
+ int index = 0 ;
78
82
for (var entry : EncodingDB .getEncodings ()) {
79
83
final Encoding encoding = entry .getEncoding ();
80
84
81
85
final RubyEncoding rubyEncoding ;
82
86
if (encoding == USASCIIEncoding .INSTANCE ) {
87
+ assert index == US_ASCII_INDEX ;
83
88
rubyEncoding = US_ASCII ;
84
89
} else {
85
90
TruffleString tstring = TStringConstants .TSTRING_CONSTANTS .get (encoding .toString ());
86
91
if (tstring == null ) {
87
92
throw CompilerDirectives .shouldNotReachHere ("no TStringConstants for " + encoding );
88
93
}
89
94
final ImmutableRubyString name = FrozenStringLiterals .createStringAndCacheLater (tstring , US_ASCII );
90
- rubyEncoding = new RubyEncoding (encoding , name , encoding . getIndex () );
95
+ rubyEncoding = new RubyEncoding (encoding , name , index );
91
96
}
92
- encodings [encoding .getIndex ()] = rubyEncoding ;
97
+ encodings [index ] = rubyEncoding ;
98
+
99
+ index ++;
93
100
}
101
+
102
+ assert index == EncodingDB .getEncodings ().size ();
94
103
return encodings ;
95
104
}
96
105
@@ -108,23 +117,36 @@ public static RubyEncoding newRubyEncoding(RubyLanguage language, Encoding encod
108
117
return new RubyEncoding (encoding , string , index );
109
118
}
110
119
120
+ public static RubyEncoding [] initializeBuiltinEncodingsByJCodingIndex () {
121
+ final RubyEncoding [] encodings = new RubyEncoding [INITIAL_NUMBER_OF_ENCODINGS ];
122
+ for (RubyEncoding encoding : BUILT_IN_ENCODINGS ) {
123
+ // This and the usage in getBuiltInEncoding() below should be the only usages of org.jcodings.Encoding#getIndex().
124
+ // That index is not deterministic and depends on classloading, so use it as little as possible.
125
+ encodings [encoding .jcoding .getIndex ()] = encoding ;
126
+ }
127
+ return encodings ;
128
+ }
129
+
111
130
/** Should only be used when there is no other way, because this will ignore replicated and dummy encodings */
112
131
public static RubyEncoding getBuiltInEncoding (Encoding jcoding ) {
113
- var rubyEncoding = BUILT_IN_ENCODINGS [jcoding .getIndex ()];
132
+ var rubyEncoding = BUILT_IN_ENCODINGS_BY_JCODING_INDEX [jcoding .getIndex ()];
114
133
assert rubyEncoding .jcoding == jcoding ;
115
134
return rubyEncoding ;
116
135
}
117
136
118
- /** Should only be used when there is no other way, because this will ignore replicated and dummy encodings */
119
- public static RubyEncoding getBuiltInEncoding (String encodingName ) {
120
- byte [] encodingNameBytes = encodingName .getBytes (StandardCharsets .ISO_8859_1 );
121
- var entry = EncodingDB .getEncodings ().get (encodingNameBytes );
137
+ @ TruffleBoundary
138
+ public static RubyEncoding getBuiltInEncoding (String name ) {
139
+ byte [] nameBytes = StringOperations .encodeAsciiBytes (name );
140
+ EncodingDB .Entry entry = EncodingDB .getEncodings ().get (nameBytes );
141
+
142
+ if (entry == null ) {
143
+ entry = EncodingDB .getAliases ().get (nameBytes );
144
+ }
145
+
122
146
if (entry != null ) {
123
- var jcoding = entry .getEncoding ();
124
- return getBuiltInEncoding (jcoding );
125
- } else {
126
- throw CompilerDirectives .shouldNotReachHere ("Unknown encoding: " + encodingName );
147
+ return getBuiltInEncoding (entry .getEncoding ());
127
148
}
128
- }
129
149
150
+ return null ;
151
+ }
130
152
}
0 commit comments