|
| 1 | +# -*- encoding: utf-8 -*- |
| 2 | +require_relative '../../spec_helper' |
| 3 | +require_relative 'fixtures/classes' |
| 4 | +require_relative 'shared/byte_index_common.rb' |
| 5 | + |
| 6 | +describe "String#byteindex" do |
| 7 | + ruby_version_is "3.2" do |
| 8 | + it "calls #to_str to convert the first argument" do |
| 9 | + char = mock("string index char") |
| 10 | + char.should_receive(:to_str).and_return("b") |
| 11 | + "abc".byteindex(char).should == 1 |
| 12 | + end |
| 13 | + |
| 14 | + it "calls #to_int to convert the second argument" do |
| 15 | + offset = mock("string index offset") |
| 16 | + offset.should_receive(:to_int).and_return(1) |
| 17 | + "abc".byteindex("c", offset).should == 2 |
| 18 | + end |
| 19 | + |
| 20 | + it "does not raise IndexError when byte offset is correct or on string boundary" do |
| 21 | + "わ".byteindex("").should == 0 |
| 22 | + "わ".byteindex("", 0).should == 0 |
| 23 | + "わ".byteindex("", 3).should == 3 |
| 24 | + end |
| 25 | + |
| 26 | + it_behaves_like :byte_index_common, :byteindex |
| 27 | + end |
| 28 | +end |
| 29 | + |
| 30 | +describe "String#byteindex with String" do |
| 31 | + ruby_version_is "3.2" do |
| 32 | + it "behaves the same as String#byteindex(char) for one-character strings" do |
| 33 | + "blablabla hello cruel world...!".split("").uniq.each do |str| |
| 34 | + chr = str[0] |
| 35 | + str.byteindex(str).should == str.byteindex(chr) |
| 36 | + |
| 37 | + 0.upto(str.size + 1) do |start| |
| 38 | + str.byteindex(str, start).should == str.byteindex(chr, start) |
| 39 | + end |
| 40 | + |
| 41 | + (-str.size - 1).upto(-1) do |start| |
| 42 | + str.byteindex(str, start).should == str.byteindex(chr, start) |
| 43 | + end |
| 44 | + end |
| 45 | + end |
| 46 | + |
| 47 | + it "returns the byteindex of the first occurrence of the given substring" do |
| 48 | + "blablabla".byteindex("").should == 0 |
| 49 | + "blablabla".byteindex("b").should == 0 |
| 50 | + "blablabla".byteindex("bla").should == 0 |
| 51 | + "blablabla".byteindex("blabla").should == 0 |
| 52 | + "blablabla".byteindex("blablabla").should == 0 |
| 53 | + |
| 54 | + "blablabla".byteindex("l").should == 1 |
| 55 | + "blablabla".byteindex("la").should == 1 |
| 56 | + "blablabla".byteindex("labla").should == 1 |
| 57 | + "blablabla".byteindex("lablabla").should == 1 |
| 58 | + |
| 59 | + "blablabla".byteindex("a").should == 2 |
| 60 | + "blablabla".byteindex("abla").should == 2 |
| 61 | + "blablabla".byteindex("ablabla").should == 2 |
| 62 | + end |
| 63 | + |
| 64 | + it "treats the offset as a byteindex" do |
| 65 | + "aaaaa".byteindex("a", 0).should == 0 |
| 66 | + "aaaaa".byteindex("a", 2).should == 2 |
| 67 | + "aaaaa".byteindex("a", 4).should == 4 |
| 68 | + end |
| 69 | + |
| 70 | + it "ignores string subclasses" do |
| 71 | + "blablabla".byteindex(StringSpecs::MyString.new("bla")).should == 0 |
| 72 | + StringSpecs::MyString.new("blablabla").byteindex("bla").should == 0 |
| 73 | + StringSpecs::MyString.new("blablabla").byteindex(StringSpecs::MyString.new("bla")).should == 0 |
| 74 | + end |
| 75 | + |
| 76 | + it "starts the search at the given offset" do |
| 77 | + "blablabla".byteindex("bl", 0).should == 0 |
| 78 | + "blablabla".byteindex("bl", 1).should == 3 |
| 79 | + "blablabla".byteindex("bl", 2).should == 3 |
| 80 | + "blablabla".byteindex("bl", 3).should == 3 |
| 81 | + |
| 82 | + "blablabla".byteindex("bla", 0).should == 0 |
| 83 | + "blablabla".byteindex("bla", 1).should == 3 |
| 84 | + "blablabla".byteindex("bla", 2).should == 3 |
| 85 | + "blablabla".byteindex("bla", 3).should == 3 |
| 86 | + |
| 87 | + "blablabla".byteindex("blab", 0).should == 0 |
| 88 | + "blablabla".byteindex("blab", 1).should == 3 |
| 89 | + "blablabla".byteindex("blab", 2).should == 3 |
| 90 | + "blablabla".byteindex("blab", 3).should == 3 |
| 91 | + |
| 92 | + "blablabla".byteindex("la", 1).should == 1 |
| 93 | + "blablabla".byteindex("la", 2).should == 4 |
| 94 | + "blablabla".byteindex("la", 3).should == 4 |
| 95 | + "blablabla".byteindex("la", 4).should == 4 |
| 96 | + |
| 97 | + "blablabla".byteindex("lab", 1).should == 1 |
| 98 | + "blablabla".byteindex("lab", 2).should == 4 |
| 99 | + "blablabla".byteindex("lab", 3).should == 4 |
| 100 | + "blablabla".byteindex("lab", 4).should == 4 |
| 101 | + |
| 102 | + "blablabla".byteindex("ab", 2).should == 2 |
| 103 | + "blablabla".byteindex("ab", 3).should == 5 |
| 104 | + "blablabla".byteindex("ab", 4).should == 5 |
| 105 | + "blablabla".byteindex("ab", 5).should == 5 |
| 106 | + |
| 107 | + "blablabla".byteindex("", 0).should == 0 |
| 108 | + "blablabla".byteindex("", 1).should == 1 |
| 109 | + "blablabla".byteindex("", 2).should == 2 |
| 110 | + "blablabla".byteindex("", 7).should == 7 |
| 111 | + "blablabla".byteindex("", 8).should == 8 |
| 112 | + "blablabla".byteindex("", 9).should == 9 |
| 113 | + end |
| 114 | + |
| 115 | + it "starts the search at offset + self.length if offset is negative" do |
| 116 | + str = "blablabla" |
| 117 | + |
| 118 | + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| |
| 119 | + (-str.length .. -1).each do |offset| |
| 120 | + str.byteindex(needle, offset).should == |
| 121 | + str.byteindex(needle, offset + str.length) |
| 122 | + end |
| 123 | + end |
| 124 | + end |
| 125 | + |
| 126 | + it "returns nil if the substring isn't found" do |
| 127 | + "blablabla".byteindex("B").should == nil |
| 128 | + "blablabla".byteindex("z").should == nil |
| 129 | + "blablabla".byteindex("BLA").should == nil |
| 130 | + "blablabla".byteindex("blablablabla").should == nil |
| 131 | + "blablabla".byteindex("", 10).should == nil |
| 132 | + |
| 133 | + "hello".byteindex("he", 1).should == nil |
| 134 | + "hello".byteindex("he", 2).should == nil |
| 135 | + "I’ve got a multibyte character.\n".byteindex("\n\n").should == nil |
| 136 | + end |
| 137 | + |
| 138 | + it "returns the character byteindex of a multibyte character" do |
| 139 | + "ありがとう".byteindex("が").should == 6 |
| 140 | + end |
| 141 | + |
| 142 | + it "returns the character byteindex after offset" do |
| 143 | + "われわれ".byteindex("わ", 3).should == 6 |
| 144 | + "ありがとうありがとう".byteindex("が", 9).should == 21 |
| 145 | + end |
| 146 | + |
| 147 | + it "returns the character byteindex after a partial first match" do |
| 148 | + "</</h".byteindex("</h").should == 2 |
| 149 | + end |
| 150 | + |
| 151 | + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do |
| 152 | + char = "れ".encode Encoding::EUC_JP |
| 153 | + -> do |
| 154 | + "あれ".byteindex(char) |
| 155 | + end.should raise_error(Encoding::CompatibilityError) |
| 156 | + end |
| 157 | + |
| 158 | + it "handles a substring in a superset encoding" do |
| 159 | + 'abc'.force_encoding(Encoding::US_ASCII).byteindex('é').should == nil |
| 160 | + end |
| 161 | + |
| 162 | + it "handles a substring in a subset encoding" do |
| 163 | + 'été'.byteindex('t'.force_encoding(Encoding::US_ASCII)).should == 2 |
| 164 | + end |
| 165 | + end |
| 166 | +end |
| 167 | + |
| 168 | +describe "String#byteindex with Regexp" do |
| 169 | + ruby_version_is "3.2" do |
| 170 | + it "behaves the same as String#byteindex(string) for escaped string regexps" do |
| 171 | + ["blablabla", "hello cruel world...!"].each do |str| |
| 172 | + ["", "b", "bla", "lab", "o c", "d."].each do |needle| |
| 173 | + regexp = Regexp.new(Regexp.escape(needle)) |
| 174 | + str.byteindex(regexp).should == str.byteindex(needle) |
| 175 | + |
| 176 | + 0.upto(str.size + 1) do |start| |
| 177 | + str.byteindex(regexp, start).should == str.byteindex(needle, start) |
| 178 | + end |
| 179 | + |
| 180 | + (-str.size - 1).upto(-1) do |start| |
| 181 | + str.byteindex(regexp, start).should == str.byteindex(needle, start) |
| 182 | + end |
| 183 | + end |
| 184 | + end |
| 185 | + end |
| 186 | + |
| 187 | + it "returns the byteindex of the first match of regexp" do |
| 188 | + "blablabla".byteindex(/bla/).should == 0 |
| 189 | + "blablabla".byteindex(/BLA/i).should == 0 |
| 190 | + |
| 191 | + "blablabla".byteindex(/.{0}/).should == 0 |
| 192 | + "blablabla".byteindex(/.{6}/).should == 0 |
| 193 | + "blablabla".byteindex(/.{9}/).should == 0 |
| 194 | + |
| 195 | + "blablabla".byteindex(/.*/).should == 0 |
| 196 | + "blablabla".byteindex(/.+/).should == 0 |
| 197 | + |
| 198 | + "blablabla".byteindex(/lab|b/).should == 0 |
| 199 | + |
| 200 | + not_supported_on :opal do |
| 201 | + "blablabla".byteindex(/\A/).should == 0 |
| 202 | + "blablabla".byteindex(/\Z/).should == 9 |
| 203 | + "blablabla".byteindex(/\z/).should == 9 |
| 204 | + "blablabla\n".byteindex(/\Z/).should == 9 |
| 205 | + "blablabla\n".byteindex(/\z/).should == 10 |
| 206 | + end |
| 207 | + |
| 208 | + "blablabla".byteindex(/^/).should == 0 |
| 209 | + "\nblablabla".byteindex(/^/).should == 0 |
| 210 | + "b\nablabla".byteindex(/$/).should == 1 |
| 211 | + "bl\nablabla".byteindex(/$/).should == 2 |
| 212 | + |
| 213 | + "blablabla".byteindex(/.l./).should == 0 |
| 214 | + end |
| 215 | + |
| 216 | + it "starts the search at the given offset" do |
| 217 | + "blablabla".byteindex(/.{0}/, 5).should == 5 |
| 218 | + "blablabla".byteindex(/.{1}/, 5).should == 5 |
| 219 | + "blablabla".byteindex(/.{2}/, 5).should == 5 |
| 220 | + "blablabla".byteindex(/.{3}/, 5).should == 5 |
| 221 | + "blablabla".byteindex(/.{4}/, 5).should == 5 |
| 222 | + |
| 223 | + "blablabla".byteindex(/.{0}/, 3).should == 3 |
| 224 | + "blablabla".byteindex(/.{1}/, 3).should == 3 |
| 225 | + "blablabla".byteindex(/.{2}/, 3).should == 3 |
| 226 | + "blablabla".byteindex(/.{5}/, 3).should == 3 |
| 227 | + "blablabla".byteindex(/.{6}/, 3).should == 3 |
| 228 | + |
| 229 | + "blablabla".byteindex(/.l./, 0).should == 0 |
| 230 | + "blablabla".byteindex(/.l./, 1).should == 3 |
| 231 | + "blablabla".byteindex(/.l./, 2).should == 3 |
| 232 | + "blablabla".byteindex(/.l./, 3).should == 3 |
| 233 | + |
| 234 | + "xblaxbla".byteindex(/x./, 0).should == 0 |
| 235 | + "xblaxbla".byteindex(/x./, 1).should == 4 |
| 236 | + "xblaxbla".byteindex(/x./, 2).should == 4 |
| 237 | + |
| 238 | + not_supported_on :opal do |
| 239 | + "blablabla\n".byteindex(/\Z/, 9).should == 9 |
| 240 | + end |
| 241 | + end |
| 242 | + |
| 243 | + it "starts the search at offset + self.length if offset is negative" do |
| 244 | + str = "blablabla" |
| 245 | + |
| 246 | + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| |
| 247 | + (-str.length .. -1).each do |offset| |
| 248 | + str.byteindex(needle, offset).should == |
| 249 | + str.byteindex(needle, offset + str.length) |
| 250 | + end |
| 251 | + end |
| 252 | + end |
| 253 | + |
| 254 | + it "returns nil if the substring isn't found" do |
| 255 | + "blablabla".byteindex(/BLA/).should == nil |
| 256 | + |
| 257 | + "blablabla".byteindex(/.{10}/).should == nil |
| 258 | + "blaxbla".byteindex(/.x/, 3).should == nil |
| 259 | + "blaxbla".byteindex(/..x/, 2).should == nil |
| 260 | + end |
| 261 | + |
| 262 | + it "returns nil if the Regexp matches the empty string and the offset is out of range" do |
| 263 | + "ruby".byteindex(//, 12).should be_nil |
| 264 | + end |
| 265 | + |
| 266 | + it "supports \\G which matches at the given start offset" do |
| 267 | + "helloYOU.".byteindex(/\GYOU/, 5).should == 5 |
| 268 | + "helloYOU.".byteindex(/\GYOU/).should == nil |
| 269 | + |
| 270 | + re = /\G.+YOU/ |
| 271 | + # The # marks where \G will match. |
| 272 | + [ |
| 273 | + ["#hi!YOUall.", 0], |
| 274 | + ["h#i!YOUall.", 1], |
| 275 | + ["hi#!YOUall.", 2], |
| 276 | + ["hi!#YOUall.", nil] |
| 277 | + ].each do |spec| |
| 278 | + |
| 279 | + start = spec[0].byteindex("#") |
| 280 | + str = spec[0].delete("#") |
| 281 | + |
| 282 | + str.byteindex(re, start).should == spec[1] |
| 283 | + end |
| 284 | + end |
| 285 | + |
| 286 | + it "converts start_offset to an integer via to_int" do |
| 287 | + obj = mock('1') |
| 288 | + obj.should_receive(:to_int).and_return(1) |
| 289 | + "RWOARW".byteindex(/R./, obj).should == 4 |
| 290 | + end |
| 291 | + |
| 292 | + it "returns the character byteindex of a multibyte character" do |
| 293 | + "ありがとう".byteindex(/が/).should == 6 |
| 294 | + end |
| 295 | + |
| 296 | + it "returns the character byteindex after offset" do |
| 297 | + "われわれ".byteindex(/わ/, 3).should == 6 |
| 298 | + end |
| 299 | + |
| 300 | + it "treats the offset as a byteindex" do |
| 301 | + "われわわれ".byteindex(/わ/, 6).should == 6 |
| 302 | + end |
| 303 | + end |
| 304 | +end |
0 commit comments