Skip to content

Commit 45d9756

Browse files
committed
Add String#byteindex
Add String#byterindex Add tests
1 parent 2372d09 commit 45d9756

File tree

10 files changed

+858
-2
lines changed

10 files changed

+858
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Compatibility:
1818
* Alias `String#-@` to `String#dedup` (#3039, @itarato).
1919
* Fix `Pathname#relative_path_from` to convert string arguments to Pathname objects (@rwstauner).
2020
* Add `String#bytesplice` (#3039, @itarato).
21+
* Add `String#byteindex` and `String#byterindex` (#3039, @itarato).
2122

2223
Performance:
2324

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
# -*- encoding: utf-8 -*-
2+
require_relative '../../spec_helper'
3+
require_relative 'fixtures/classes'
4+
require_relative 'shared/byte_index_common.rb'
5+
6+
describe "String#byteindex" do
7+
ruby_version_is "3.2" do
8+
it "calls #to_str to convert the first argument" do
9+
char = mock("string index char")
10+
char.should_receive(:to_str).and_return("b")
11+
"abc".byteindex(char).should == 1
12+
end
13+
14+
it "calls #to_int to convert the second argument" do
15+
offset = mock("string index offset")
16+
offset.should_receive(:to_int).and_return(1)
17+
"abc".byteindex("c", offset).should == 2
18+
end
19+
20+
it "does not raise IndexError when byte offset is correct or on string boundary" do
21+
"わ".byteindex("").should == 0
22+
"わ".byteindex("", 0).should == 0
23+
"わ".byteindex("", 3).should == 3
24+
end
25+
26+
it_behaves_like :byte_index_common, :byteindex
27+
end
28+
end
29+
30+
describe "String#byteindex with String" do
31+
ruby_version_is "3.2" do
32+
it "behaves the same as String#byteindex(char) for one-character strings" do
33+
"blablabla hello cruel world...!".split("").uniq.each do |str|
34+
chr = str[0]
35+
str.byteindex(str).should == str.byteindex(chr)
36+
37+
0.upto(str.size + 1) do |start|
38+
str.byteindex(str, start).should == str.byteindex(chr, start)
39+
end
40+
41+
(-str.size - 1).upto(-1) do |start|
42+
str.byteindex(str, start).should == str.byteindex(chr, start)
43+
end
44+
end
45+
end
46+
47+
it "returns the byteindex of the first occurrence of the given substring" do
48+
"blablabla".byteindex("").should == 0
49+
"blablabla".byteindex("b").should == 0
50+
"blablabla".byteindex("bla").should == 0
51+
"blablabla".byteindex("blabla").should == 0
52+
"blablabla".byteindex("blablabla").should == 0
53+
54+
"blablabla".byteindex("l").should == 1
55+
"blablabla".byteindex("la").should == 1
56+
"blablabla".byteindex("labla").should == 1
57+
"blablabla".byteindex("lablabla").should == 1
58+
59+
"blablabla".byteindex("a").should == 2
60+
"blablabla".byteindex("abla").should == 2
61+
"blablabla".byteindex("ablabla").should == 2
62+
end
63+
64+
it "treats the offset as a byteindex" do
65+
"aaaaa".byteindex("a", 0).should == 0
66+
"aaaaa".byteindex("a", 2).should == 2
67+
"aaaaa".byteindex("a", 4).should == 4
68+
end
69+
70+
it "ignores string subclasses" do
71+
"blablabla".byteindex(StringSpecs::MyString.new("bla")).should == 0
72+
StringSpecs::MyString.new("blablabla").byteindex("bla").should == 0
73+
StringSpecs::MyString.new("blablabla").byteindex(StringSpecs::MyString.new("bla")).should == 0
74+
end
75+
76+
it "starts the search at the given offset" do
77+
"blablabla".byteindex("bl", 0).should == 0
78+
"blablabla".byteindex("bl", 1).should == 3
79+
"blablabla".byteindex("bl", 2).should == 3
80+
"blablabla".byteindex("bl", 3).should == 3
81+
82+
"blablabla".byteindex("bla", 0).should == 0
83+
"blablabla".byteindex("bla", 1).should == 3
84+
"blablabla".byteindex("bla", 2).should == 3
85+
"blablabla".byteindex("bla", 3).should == 3
86+
87+
"blablabla".byteindex("blab", 0).should == 0
88+
"blablabla".byteindex("blab", 1).should == 3
89+
"blablabla".byteindex("blab", 2).should == 3
90+
"blablabla".byteindex("blab", 3).should == 3
91+
92+
"blablabla".byteindex("la", 1).should == 1
93+
"blablabla".byteindex("la", 2).should == 4
94+
"blablabla".byteindex("la", 3).should == 4
95+
"blablabla".byteindex("la", 4).should == 4
96+
97+
"blablabla".byteindex("lab", 1).should == 1
98+
"blablabla".byteindex("lab", 2).should == 4
99+
"blablabla".byteindex("lab", 3).should == 4
100+
"blablabla".byteindex("lab", 4).should == 4
101+
102+
"blablabla".byteindex("ab", 2).should == 2
103+
"blablabla".byteindex("ab", 3).should == 5
104+
"blablabla".byteindex("ab", 4).should == 5
105+
"blablabla".byteindex("ab", 5).should == 5
106+
107+
"blablabla".byteindex("", 0).should == 0
108+
"blablabla".byteindex("", 1).should == 1
109+
"blablabla".byteindex("", 2).should == 2
110+
"blablabla".byteindex("", 7).should == 7
111+
"blablabla".byteindex("", 8).should == 8
112+
"blablabla".byteindex("", 9).should == 9
113+
end
114+
115+
it "starts the search at offset + self.length if offset is negative" do
116+
str = "blablabla"
117+
118+
["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
119+
(-str.length .. -1).each do |offset|
120+
str.byteindex(needle, offset).should ==
121+
str.byteindex(needle, offset + str.length)
122+
end
123+
end
124+
end
125+
126+
it "returns nil if the substring isn't found" do
127+
"blablabla".byteindex("B").should == nil
128+
"blablabla".byteindex("z").should == nil
129+
"blablabla".byteindex("BLA").should == nil
130+
"blablabla".byteindex("blablablabla").should == nil
131+
"blablabla".byteindex("", 10).should == nil
132+
133+
"hello".byteindex("he", 1).should == nil
134+
"hello".byteindex("he", 2).should == nil
135+
"I’ve got a multibyte character.\n".byteindex("\n\n").should == nil
136+
end
137+
138+
it "returns the character byteindex of a multibyte character" do
139+
"ありがとう".byteindex("が").should == 6
140+
end
141+
142+
it "returns the character byteindex after offset" do
143+
"われわれ".byteindex("わ", 3).should == 6
144+
"ありがとうありがとう".byteindex("が", 9).should == 21
145+
end
146+
147+
it "returns the character byteindex after a partial first match" do
148+
"</</h".byteindex("</h").should == 2
149+
end
150+
151+
it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
152+
char = "れ".encode Encoding::EUC_JP
153+
-> do
154+
"あれ".byteindex(char)
155+
end.should raise_error(Encoding::CompatibilityError)
156+
end
157+
158+
it "handles a substring in a superset encoding" do
159+
'abc'.force_encoding(Encoding::US_ASCII).byteindex('é').should == nil
160+
end
161+
162+
it "handles a substring in a subset encoding" do
163+
'été'.byteindex('t'.force_encoding(Encoding::US_ASCII)).should == 2
164+
end
165+
end
166+
end
167+
168+
describe "String#byteindex with Regexp" do
169+
ruby_version_is "3.2" do
170+
it "behaves the same as String#byteindex(string) for escaped string regexps" do
171+
["blablabla", "hello cruel world...!"].each do |str|
172+
["", "b", "bla", "lab", "o c", "d."].each do |needle|
173+
regexp = Regexp.new(Regexp.escape(needle))
174+
str.byteindex(regexp).should == str.byteindex(needle)
175+
176+
0.upto(str.size + 1) do |start|
177+
str.byteindex(regexp, start).should == str.byteindex(needle, start)
178+
end
179+
180+
(-str.size - 1).upto(-1) do |start|
181+
str.byteindex(regexp, start).should == str.byteindex(needle, start)
182+
end
183+
end
184+
end
185+
end
186+
187+
it "returns the byteindex of the first match of regexp" do
188+
"blablabla".byteindex(/bla/).should == 0
189+
"blablabla".byteindex(/BLA/i).should == 0
190+
191+
"blablabla".byteindex(/.{0}/).should == 0
192+
"blablabla".byteindex(/.{6}/).should == 0
193+
"blablabla".byteindex(/.{9}/).should == 0
194+
195+
"blablabla".byteindex(/.*/).should == 0
196+
"blablabla".byteindex(/.+/).should == 0
197+
198+
"blablabla".byteindex(/lab|b/).should == 0
199+
200+
not_supported_on :opal do
201+
"blablabla".byteindex(/\A/).should == 0
202+
"blablabla".byteindex(/\Z/).should == 9
203+
"blablabla".byteindex(/\z/).should == 9
204+
"blablabla\n".byteindex(/\Z/).should == 9
205+
"blablabla\n".byteindex(/\z/).should == 10
206+
end
207+
208+
"blablabla".byteindex(/^/).should == 0
209+
"\nblablabla".byteindex(/^/).should == 0
210+
"b\nablabla".byteindex(/$/).should == 1
211+
"bl\nablabla".byteindex(/$/).should == 2
212+
213+
"blablabla".byteindex(/.l./).should == 0
214+
end
215+
216+
it "starts the search at the given offset" do
217+
"blablabla".byteindex(/.{0}/, 5).should == 5
218+
"blablabla".byteindex(/.{1}/, 5).should == 5
219+
"blablabla".byteindex(/.{2}/, 5).should == 5
220+
"blablabla".byteindex(/.{3}/, 5).should == 5
221+
"blablabla".byteindex(/.{4}/, 5).should == 5
222+
223+
"blablabla".byteindex(/.{0}/, 3).should == 3
224+
"blablabla".byteindex(/.{1}/, 3).should == 3
225+
"blablabla".byteindex(/.{2}/, 3).should == 3
226+
"blablabla".byteindex(/.{5}/, 3).should == 3
227+
"blablabla".byteindex(/.{6}/, 3).should == 3
228+
229+
"blablabla".byteindex(/.l./, 0).should == 0
230+
"blablabla".byteindex(/.l./, 1).should == 3
231+
"blablabla".byteindex(/.l./, 2).should == 3
232+
"blablabla".byteindex(/.l./, 3).should == 3
233+
234+
"xblaxbla".byteindex(/x./, 0).should == 0
235+
"xblaxbla".byteindex(/x./, 1).should == 4
236+
"xblaxbla".byteindex(/x./, 2).should == 4
237+
238+
not_supported_on :opal do
239+
"blablabla\n".byteindex(/\Z/, 9).should == 9
240+
end
241+
end
242+
243+
it "starts the search at offset + self.length if offset is negative" do
244+
str = "blablabla"
245+
246+
["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
247+
(-str.length .. -1).each do |offset|
248+
str.byteindex(needle, offset).should ==
249+
str.byteindex(needle, offset + str.length)
250+
end
251+
end
252+
end
253+
254+
it "returns nil if the substring isn't found" do
255+
"blablabla".byteindex(/BLA/).should == nil
256+
257+
"blablabla".byteindex(/.{10}/).should == nil
258+
"blaxbla".byteindex(/.x/, 3).should == nil
259+
"blaxbla".byteindex(/..x/, 2).should == nil
260+
end
261+
262+
it "returns nil if the Regexp matches the empty string and the offset is out of range" do
263+
"ruby".byteindex(//, 12).should be_nil
264+
end
265+
266+
it "supports \\G which matches at the given start offset" do
267+
"helloYOU.".byteindex(/\GYOU/, 5).should == 5
268+
"helloYOU.".byteindex(/\GYOU/).should == nil
269+
270+
re = /\G.+YOU/
271+
# The # marks where \G will match.
272+
[
273+
["#hi!YOUall.", 0],
274+
["h#i!YOUall.", 1],
275+
["hi#!YOUall.", 2],
276+
["hi!#YOUall.", nil]
277+
].each do |spec|
278+
279+
start = spec[0].byteindex("#")
280+
str = spec[0].delete("#")
281+
282+
str.byteindex(re, start).should == spec[1]
283+
end
284+
end
285+
286+
it "converts start_offset to an integer via to_int" do
287+
obj = mock('1')
288+
obj.should_receive(:to_int).and_return(1)
289+
"RWOARW".byteindex(/R./, obj).should == 4
290+
end
291+
292+
it "returns the character byteindex of a multibyte character" do
293+
"ありがとう".byteindex(/が/).should == 6
294+
end
295+
296+
it "returns the character byteindex after offset" do
297+
"われわれ".byteindex(/わ/, 3).should == 6
298+
end
299+
300+
it "treats the offset as a byteindex" do
301+
"われわわれ".byteindex(/わ/, 6).should == 6
302+
end
303+
end
304+
end

0 commit comments

Comments
 (0)