Skip to content

Commit 2372d09

Browse files
andrykonchineregon
authored andcommitted
[GR-19220] Add String#bytesplice
PullRequest: truffleruby/3828
2 parents 2b4657a + f0cbd62 commit 2372d09

File tree

7 files changed

+195
-0
lines changed

7 files changed

+195
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Compatibility:
1717
* Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (#3039, @rwstauner).
1818
* Alias `String#-@` to `String#dedup` (#3039, @itarato).
1919
* Fix `Pathname#relative_path_from` to convert string arguments to Pathname objects (@rwstauner).
20+
* Add `String#bytesplice` (#3039, @itarato).
2021

2122
Performance:
2223

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# -*- encoding: utf-8 -*-
2+
require_relative '../../spec_helper'
3+
4+
describe "String#bytesplice" do
5+
ruby_version_is "3.2" do
6+
it "raises IndexError when index is less than -bytesize" do
7+
-> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string")
8+
end
9+
10+
it "raises IndexError when index is greater than bytesize" do
11+
-> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string")
12+
end
13+
14+
it "raises IndexError for negative length" do
15+
-> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2")
16+
end
17+
18+
it "replaces with integer indices" do
19+
"hello".bytesplice(-5, 0, "xxx").should == "xxxhello"
20+
"hello".bytesplice(0, 0, "xxx").should == "xxxhello"
21+
"hello".bytesplice(0, 1, "xxx").should == "xxxello"
22+
"hello".bytesplice(0, 5, "xxx").should == "xxx"
23+
"hello".bytesplice(0, 6, "xxx").should == "xxx"
24+
end
25+
26+
it "raises RangeError when range left boundary is less than -bytesize" do
27+
-> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range")
28+
end
29+
30+
it "replaces with ranges" do
31+
"hello".bytesplice(-5...-5, "xxx").should == "xxxhello"
32+
"hello".bytesplice(0...0, "xxx").should == "xxxhello"
33+
"hello".bytesplice(0..0, "xxx").should == "xxxello"
34+
"hello".bytesplice(0...1, "xxx").should == "xxxello"
35+
"hello".bytesplice(0..1, "xxx").should == "xxxllo"
36+
"hello".bytesplice(0..-1, "xxx").should == "xxx"
37+
"hello".bytesplice(0...5, "xxx").should == "xxx"
38+
"hello".bytesplice(0...6, "xxx").should == "xxx"
39+
end
40+
41+
it "raises TypeError when integer index is provided without length argument" do
42+
-> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)")
43+
end
44+
45+
it "replaces on an empty string" do
46+
"".bytesplice(0, 0, "").should == ""
47+
"".bytesplice(0, 0, "xxx").should == "xxx"
48+
end
49+
50+
it "mutates self" do
51+
s = "hello"
52+
s.bytesplice(2, 1, "xxx").should.equal?(s)
53+
end
54+
55+
it "raises when string is frozen" do
56+
s = "hello".freeze
57+
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
58+
end
59+
end
60+
end
61+
62+
describe "String#bytesplice with multibyte characters" do
63+
ruby_version_is "3.2" do
64+
it "raises IndexError when index is out of byte size boundary" do
65+
-> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string")
66+
end
67+
68+
it "raises IndexError when index is not on a codepoint boundary" do
69+
-> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
70+
end
71+
72+
it "raises IndexError when length is not matching the codepoint boundary" do
73+
-> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
74+
-> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
75+
end
76+
77+
it "replaces with integer indices" do
78+
"こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは"
79+
"こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは"
80+
"こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは"
81+
"こんにちは".bytesplice(3, 3, "はは").should == "こははにちは"
82+
"こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx"
83+
end
84+
85+
it "replaces with range" do
86+
"こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは"
87+
"こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは"
88+
"こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは"
89+
"こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは"
90+
"こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは"
91+
"こんにちは".bytesplice(0..-1, "xxx").should == "xxx"
92+
"こんにちは".bytesplice(0...15, "xxx").should == "xxx"
93+
"こんにちは".bytesplice(0...18, "xxx").should == "xxx"
94+
end
95+
96+
it "treats negative length for range as 0" do
97+
"こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは"
98+
"こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは"
99+
"こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは"
100+
end
101+
102+
it "raises when ranges not match codepoint boundaries" do
103+
-> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
104+
-> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
105+
# Begin is incorrect
106+
-> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary")
107+
-> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary")
108+
# End is incorrect
109+
-> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary")
110+
-> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary")
111+
end
112+
113+
it "deals with a different encoded argument" do
114+
s = "こんにちは"
115+
s.encoding.should == Encoding::UTF_8
116+
sub = "xxxxxx"
117+
sub.force_encoding(Encoding::US_ASCII)
118+
119+
result = s.bytesplice(0, 3, sub)
120+
result.should == "xxxxxxんにちは"
121+
result.encoding.should == Encoding::UTF_8
122+
123+
s = "xxxxxx"
124+
s.force_encoding(Encoding::US_ASCII)
125+
sub = "こんにちは"
126+
sub.encoding.should == Encoding::UTF_8
127+
128+
result = s.bytesplice(0, 3, sub)
129+
result.should == "こんにちはxxx"
130+
result.encoding.should == Encoding::UTF_8
131+
end
132+
end
133+
end

spec/tags/truffle/methods_tags.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,4 @@ fails:Public methods on Thread should include native_thread_id
113113
fails:Public methods on UnboundMethod should include private?
114114
fails:Public methods on UnboundMethod should include protected?
115115
fails:Public methods on UnboundMethod should include public?
116+
fails:Public methods on String should not include bytesplice

spec/truffleruby.next-specs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ spec/ruby/core/hash/shift_spec.rb
1616
spec/ruby/core/range/size_spec.rb
1717

1818
spec/ruby/core/string/dedup_spec.rb
19+
spec/ruby/core/string/bytesplice_spec.rb

src/main/java/org/truffleruby/core/string/StringNodes.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4459,4 +4459,16 @@ private String formatTooLongError(int count, RubyString string) {
44594459

44604460
}
44614461

4462+
@Primitive(name = "string_is_character_head?", lowerFixnum = 2)
4463+
public abstract static class IsCharacterHeadPrimitiveNode extends PrimitiveArrayArgumentsNode {
4464+
4465+
@Specialization
4466+
protected boolean isCharacterHead(RubyEncoding enc, Object string, int byteOffset,
4467+
@Cached RubyStringLibrary libString,
4468+
@Cached IsCharacterHeadNode isCharacterHeadNode) {
4469+
var tstring = libString.getTString(string);
4470+
return isCharacterHeadNode.execute(enc, tstring, byteOffset);
4471+
}
4472+
}
4473+
44624474
}

src/main/ruby/truffleruby/core/string.rb

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,49 @@ def byteslice(index_or_range, length = undefined)
7070
byteslice index, length
7171
end
7272

73+
def bytesplice(index_or_range, length = undefined, str)
74+
is_range = Primitive.is_a?(index_or_range, Range)
75+
76+
if Primitive.undefined?(length)
77+
raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range
78+
79+
start, len = Primitive.range_normalized_start_length(index_or_range, bytesize)
80+
len = 0 if len < 0
81+
else
82+
start = Primitive.rb_to_int(index_or_range)
83+
start += bytesize if start < 0
84+
len = Primitive.rb_to_int(length)
85+
end
86+
87+
str = StringValue(str)
88+
89+
if len < 0
90+
raise IndexError, "negative length #{len}"
91+
end
92+
93+
if bytesize < start || start < 0
94+
if is_range
95+
raise RangeError, "#{index_or_range} out of range"
96+
else
97+
raise IndexError, "index #{index_or_range} out of string"
98+
end
99+
end
100+
101+
len = bytesize - start if len > bytesize - start
102+
finish = start + len
103+
104+
if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start)
105+
raise IndexError, "offset #{start} does not land on character boundary"
106+
end
107+
if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish)
108+
raise IndexError, "offset #{finish} does not land on character boundary"
109+
end
110+
111+
Primitive.check_mutable_string(self)
112+
enc = Primitive.encoding_ensure_compatible_str(self, str)
113+
Primitive.string_splice(self, str, start, len, enc)
114+
end
115+
73116
def self.try_convert(obj)
74117
Truffle::Type.try_convert obj, String, :to_str
75118
end

src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ def byteslice(...)
8080
to_s.byteslice(...)
8181
end
8282

83+
def bytesplice(...)
84+
to_s.bytesplice(...)
85+
end
86+
8387
def capitalize(...)
8488
to_s.capitalize(...)
8589
end

0 commit comments

Comments
 (0)