Skip to content

Commit 6554576

Browse files
committed
[GR-19220] Fix encoding related error messages for CRuby 3.2 (#3107)
PullRequest: truffleruby/3886
2 parents f91c002 + 4c08ace commit 6554576

File tree

7 files changed

+57
-14
lines changed

7 files changed

+57
-14
lines changed

spec/ruby/core/string/index_spec.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@
167167
it "handles a substring in a subset encoding" do
168168
'été'.index('t'.force_encoding(Encoding::US_ASCII)).should == 1
169169
end
170+
171+
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
172+
str = 'abc'.force_encoding("ISO-2022-JP")
173+
pattern = 'b'.force_encoding("EUC-JP")
174+
175+
-> { str.index(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP")
176+
end
170177
end
171178

172179
describe "String#index with Regexp" do
@@ -312,6 +319,17 @@
312319
"われわわれ".index(/わ/, 3).should == 3
313320
end
314321

322+
ruby_bug "#19763", ""..."3.3.0" do
323+
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
324+
re = Regexp.new "れ".encode(Encoding::EUC_JP)
325+
-> do
326+
"あれ".index re
327+
end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)")
328+
end
329+
end
330+
331+
# The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0
332+
# Still test that the right exception class is used before that.
315333
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
316334
re = Regexp.new "れ".encode(Encoding::EUC_JP)
317335
-> do

spec/ruby/core/string/rindex_spec.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ def obj.method_missing(*args) 5 end
204204
it "handles a substring in a subset encoding" do
205205
'été'.rindex('t'.force_encoding(Encoding::US_ASCII)).should == 1
206206
end
207+
208+
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
209+
str = 'abc'.force_encoding("ISO-2022-JP")
210+
pattern = 'b'.force_encoding("EUC-JP")
211+
212+
-> { str.rindex(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP")
213+
end
207214
end
208215

209216
describe "String#rindex with Regexp" do
@@ -373,6 +380,6 @@ def obj.method_missing(*args); 5; end
373380
re = Regexp.new "れ".encode(Encoding::EUC_JP)
374381
-> do
375382
"あれ".rindex re
376-
end.should raise_error(Encoding::CompatibilityError)
383+
end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)")
377384
end
378385
end

spec/ruby/core/string/upto_spec.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ def other.to_str() "abd" end
8080
a.should == ["Σ", "Τ", "Υ", "Φ", "Χ", "Ψ", "Ω"]
8181
end
8282

83+
it "raises Encoding::CompatibilityError when incompatible characters are given" do
84+
char1 = 'a'.force_encoding("EUC-JP")
85+
char2 = 'b'.force_encoding("ISO-2022-JP")
86+
-> { char1.upto(char2) {} }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: EUC-JP and ISO-2022-JP")
87+
end
88+
8389
describe "on sequence of numbers" do
8490
it "calls the block as Integer#upto" do
8591
"8".upto("11").to_a.should == 8.upto(11).map(&:to_s)

spec/tags/core/string/byteindex_tags.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

spec/tags/core/string/byterindex_tags.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,28 @@ private static void instrumentMatch(ConcurrentHashMap<MatchInfo, AtomicInteger>
109109
}
110110
}
111111

112+
@Primitive(name = "regexp_check_encoding")
113+
public abstract static class RegexpCheckEncodingNode extends PrimitiveArrayArgumentsNode {
114+
@Specialization
115+
protected static RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object string,
116+
@Cached PrepareRegexpEncodingNode prepareRegexpEncodingNode,
117+
@Bind("this") Node node) {
118+
return prepareRegexpEncodingNode.executePrepare(node, regexp, string);
119+
120+
}
121+
}
122+
112123
// MRI: rb_reg_prepare_enc
124+
@GenerateCached(false)
125+
@GenerateInline
113126
public abstract static class PrepareRegexpEncodingNode extends RubyBaseNode {
114127

115-
public abstract RubyEncoding executePrepare(RubyRegexp regexp, Object matchString);
128+
public abstract RubyEncoding executePrepare(Node node, RubyRegexp regexp, Object matchString);
116129

117130
@Specialization(guards = "stringLibrary.isRubyString(matchString)", limit = "1")
118-
protected static RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchString,
131+
protected static RubyEncoding regexpPrepareEncoding(Node node, RubyRegexp regexp, Object matchString,
119132
@Cached RubyStringLibrary stringLibrary,
120-
@Cached TruffleString.GetByteCodeRangeNode codeRangeNode,
133+
@Cached(inline = false) TruffleString.GetByteCodeRangeNode codeRangeNode,
121134
@Cached InlinedBranchProfile asciiOnlyProfile,
122135
@Cached InlinedBranchProfile asciiIncompatibleFixedRegexpEncodingProfile,
123136
@Cached InlinedBranchProfile asciiIncompatibleMatchStringEncodingProfile,
@@ -129,8 +142,7 @@ protected static RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object ma
129142
@Cached InlinedBranchProfile sameEncodingProfile,
130143
@Cached InlinedBranchProfile validBinaryMatchStringProfile,
131144
@Cached InlinedBranchProfile validUtf8MatchStringProfile,
132-
@Cached LazyWarnNode lazyWarnNode,
133-
@Bind("this") Node node) {
145+
@Cached LazyWarnNode lazyWarnNode) {
134146
final RubyEncoding regexpEncoding = regexp.encoding;
135147
final RubyEncoding matchStringEncoding = stringLibrary.getEncoding(matchString);
136148
var tstring = stringLibrary.getTString(matchString);
@@ -801,7 +813,7 @@ protected static Object matchInRegion(
801813
@Cached RubyStringLibrary libString,
802814
@Bind("this") Node node) {
803815
Regex regex = regexp.regex;
804-
final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(regexp, string);
816+
final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(node, regexp, string);
805817

806818
if (encodingMismatchProfile.profile(node, regexp.encoding != negotiatedEncoding)) {
807819
final EncodingCache encodingCache = regexp.cachedEncodings;
@@ -898,7 +910,7 @@ protected static Object matchInRegionTRegex(
898910
@Cached LazyTruffleStringSubstringByteIndexNode substringByteIndexNode,
899911
@Bind("this") Node node) {
900912
final Object tRegex;
901-
final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(regexp, string);
913+
final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(node, regexp, string);
902914
var tstring = switchEncodingNode.execute(libString.getTString(string), negotiatedEncoding.tencoding);
903915
final int byteLength = tstring.byteLength(negotiatedEncoding.tencoding);
904916

src/main/ruby/truffleruby/core/string.rb

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,8 @@ def upto(stop, exclusive = false)
617617
end
618618
else
619619
unless stop.size < size
620+
Primitive.encoding_ensure_compatible(self.encoding, stop.encoding)
621+
620622
after_stop = exclusive ? stop : stop.succ
621623
current = self
622624

@@ -1071,7 +1073,7 @@ def index(str, start = undefined)
10711073
end
10721074

10731075
if Primitive.is_a?(str, Regexp)
1074-
Primitive.encoding_ensure_compatible self, str
1076+
Primitive.regexp_check_encoding(str, self)
10751077

10761078
start = Primitive.character_index_to_byte_index(self, start)
10771079
if match = Truffle::RegexpOperations.match_from(str, self, start)
@@ -1115,7 +1117,7 @@ def rindex(sub, finish = undefined)
11151117
byte_finish = Primitive.character_index_to_byte_index(self, finish)
11161118

11171119
if Primitive.is_a?(sub, Regexp)
1118-
Primitive.encoding_ensure_compatible self, sub
1120+
Primitive.regexp_check_encoding(sub, self)
11191121

11201122
match_data = Truffle::RegexpOperations.search_region(sub, self, 0, byte_finish, false, true)
11211123
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match_data)
@@ -1159,7 +1161,7 @@ def byteindex(str, start = 0)
11591161
end
11601162

11611163
if is_regex_pattern
1162-
Primitive.encoding_ensure_compatible(self, str)
1164+
Primitive.regexp_check_encoding(str, self)
11631165

11641166
match = Truffle::RegexpOperations.match_from(str, self, start)
11651167
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match)
@@ -1186,7 +1188,7 @@ def byterindex(str, finish = bytesize)
11861188
end
11871189

11881190
if Primitive.is_a?(str, Regexp)
1189-
Primitive.encoding_ensure_compatible(self, str)
1191+
Primitive.regexp_check_encoding(str, self)
11901192

11911193
match = Truffle::RegexpOperations.search_region(str, self, 0, finish, false, true)
11921194
Primitive.regexp_last_match_set(Primitive.caller_special_variables, match)

0 commit comments

Comments
 (0)