@@ -202,7 +202,7 @@ def discard(skip)
202
202
# Returns the number of bytes to fetch from the buffer up-to-
203
203
# and-including +pattern+. Returns +nil+ if pattern is not found.
204
204
def find ( pattern , discard = nil )
205
- if count = @storage . locate ( pattern , @start , @used )
205
+ if count = Primitive . bytearray_locate ( @storage , pattern , @start , @used )
206
206
count - @start
207
207
end
208
208
end
@@ -225,7 +225,7 @@ def shift(count = nil, encoding = Encoding::BINARY)
225
225
total = size
226
226
total = count if count and count < total
227
227
228
- str = String . from_bytearray @storage , @start , total , encoding
228
+ str = Primitive . string_from_bytearray ( @storage , @start , total , encoding )
229
229
@start += total
230
230
231
231
str
@@ -1135,6 +1135,8 @@ def each(&block)
1135
1135
# method A, D
1136
1136
def read_to_separator
1137
1137
str = +''
1138
+ last_scan_end = 0
1139
+ separator_byte_size = @separator . bytesize
1138
1140
1139
1141
until @buffer . exhausted?
1140
1142
available = @buffer . fill_from @io , @skip
@@ -1143,26 +1145,80 @@ def read_to_separator
1143
1145
if count = @buffer . find ( @separator )
1144
1146
s = @buffer . shift ( count )
1145
1147
1146
- unless str . empty?
1147
- s . prepend ( str )
1148
- str . clear
1148
+ # We need to be careful matching against multi-byte separators since the
1149
+ # `str` value is being built up progressively.
1150
+ #
1151
+ # If the separator is only a single byte wide and we found it in the buffer
1152
+ # then `count` must be the correct position because there's no way for a single
1153
+ # byte read to be split up; it doesn't matter what `str` contains.
1154
+ #
1155
+ # If the separator is multiple bytes, then we look at `str`. If it's empty,
1156
+ # then the buffer trivially must contain the separator. If not, however, we
1157
+ # must scan the entire `str` after appending the buffer to see if the separator
1158
+ # pattern appears earlier than the position detected by `count`.
1159
+ if str . empty? || separator_byte_size == 1
1160
+ unless str . empty?
1161
+ s . prepend ( str )
1162
+ str . clear
1163
+ end
1164
+
1165
+ yield prepare_read_string ( s )
1166
+
1167
+ next
1168
+ else
1169
+ str << s
1149
1170
end
1171
+ else
1172
+ str << @buffer . shift
1173
+ end
1150
1174
1151
- s = IO . read_encode ( @io , s )
1175
+ if separator_byte_size > 1 && last_scan_end < str . bytesize
1176
+ # Since the separator could be split over multiple passes of this loop,
1177
+ # it's possible for the separator to never appear completely in `@buffer`,
1178
+ # but may appear in `str` after successive passes. If we found an unambiguous
1179
+ # match in the buffer, we wouldn't be in this branch. Since we are, we need
1180
+ # to check if the separator appears in the total read string.
1181
+ #
1182
+ # Rather than scan the entirety of `str` every time, we track how far we've
1183
+ # previously scanned. Since the separator bytes can span reads, we need to
1184
+ # step back `@separator.bytesize - 1` bytes to ensure we don't skip over the
1185
+ # separator bytes. It's `@separator.bytesize - 1` because if the entire
1186
+ # separator was already in `str` we would have found it on a previous pass of
1187
+ # the loop. Since we also need to subtract one to account for zero-based offsets,
1188
+ # we can include that in our offset and just substract the separator byte size.
1189
+ # On the very first scan we don't need to account for any partial scans of the
1190
+ # separator.
1191
+
1192
+ search_offset = last_scan_end - separator_byte_size
1193
+ search_offset = 0 if search_offset < 0
1194
+
1195
+ found_byte_index = Primitive . string_byte_index ( str , @separator . b , Encoding ::BINARY , search_offset )
1196
+
1197
+ if found_byte_index
1198
+ offset = found_byte_index + @separator . bytesize
1199
+
1200
+ # If we've read more bytes than we need to satisfy the current request, we
1201
+ # need to put the remainder back into the buffer so that subsequent reads
1202
+ # will have the correct bytes.
1203
+ if offset < str . bytesize
1204
+ @buffer . put_back ( str . byteslice ( offset , str . bytesize - offset ) )
1205
+ end
1206
+
1207
+ res = prepare_read_string ( str . byteslice ( 0 , offset ) )
1152
1208
1153
- s . chomp! ( @separator ) if @chomp
1154
- $. = @io . __send__ ( :increment_lineno )
1155
- @buffer . discard @skip if @skip
1209
+ str . clear
1210
+ last_scan_end = 0
1156
1211
1157
- yield s
1158
- else
1159
- str << @buffer . shift
1212
+ yield res
1213
+ else
1214
+ last_scan_end = str . bytesize
1215
+ end
1160
1216
end
1161
1217
end
1162
1218
1163
1219
str << @buffer . shift
1164
1220
str . chomp! ( @separator ) if @chomp
1165
- yield_string ( str ) { | y | yield y }
1221
+ yield prepare_read_string ( str ) unless str . empty?
1166
1222
end
1167
1223
1168
1224
# method B, E
@@ -1180,27 +1236,16 @@ def read_to_separator_with_limit
1180
1236
bytes = Primitive . min ( count , wanted )
1181
1237
str << @buffer . shift ( bytes )
1182
1238
1183
- str = IO . read_encode ( @io , str )
1184
-
1185
- str . chomp! ( @separator ) if @chomp
1186
- $. = @io . __send__ ( :increment_lineno )
1187
- @buffer . discard @skip if @skip
1188
-
1189
- yield str
1239
+ yield prepare_read_string ( str )
1190
1240
1191
1241
str = +''
1192
1242
wanted = limit
1193
1243
else
1194
1244
if wanted < available
1195
1245
str << @buffer . shift ( wanted )
1196
-
1197
1246
str = @buffer . read_to_char_boundary ( @io , str )
1198
1247
1199
- str . chomp! ( @separator ) if @chomp
1200
- $. = @io . __send__ ( :increment_lineno )
1201
- @buffer . discard @skip if @skip
1202
-
1203
- yield str
1248
+ yield prepare_read_string ( str )
1204
1249
1205
1250
str = +''
1206
1251
wanted = limit
@@ -1211,8 +1256,7 @@ def read_to_separator_with_limit
1211
1256
end
1212
1257
end
1213
1258
1214
- str . chomp! ( @separator ) if @chomp
1215
- yield_string ( str ) { |s | yield s }
1259
+ yield prepare_read_string ( str ) unless str . empty?
1216
1260
end
1217
1261
1218
1262
# Method G
@@ -1228,7 +1272,7 @@ def read_all
1228
1272
end
1229
1273
1230
1274
str . chomp! ( DEFAULT_RECORD_SEPARATOR ) if @chomp
1231
- yield_string ( str ) { | s | yield s }
1275
+ yield prepare_read_string ( str ) unless str . empty?
1232
1276
end
1233
1277
1234
1278
# Method H
@@ -1254,15 +1298,17 @@ def read_to_limit
1254
1298
end
1255
1299
end
1256
1300
1257
- yield_string ( str ) { | s | yield s }
1301
+ yield prepare_read_string ( str ) unless str . empty?
1258
1302
end
1259
1303
1260
- def yield_string ( str )
1261
- unless str . empty?
1262
- str = IO . read_encode ( @io , str )
1263
- $. = @io . __send__ ( :increment_lineno )
1264
- yield str
1265
- end
1304
+ def prepare_read_string ( str )
1305
+ s = IO . read_encode ( @io , str )
1306
+
1307
+ s . chomp! ( @separator ) if @chomp
1308
+ $. = @io . __send__ ( :increment_lineno )
1309
+ @buffer . discard @skip if @skip
1310
+
1311
+ s
1266
1312
end
1267
1313
end
1268
1314
0 commit comments