@@ -1135,6 +1135,8 @@ def each(&block)
1135
1135
# method A, D
1136
1136
def read_to_separator
1137
1137
str = +''
1138
+ last_scan_end = 0
1139
+ separator_byte_size = @separator . bytesize
1138
1140
1139
1141
until @buffer . exhausted?
1140
1142
available = @buffer . fill_from @io , @skip
@@ -1143,21 +1145,84 @@ def read_to_separator
1143
1145
if count = @buffer . find ( @separator )
1144
1146
s = @buffer . shift ( count )
1145
1147
1146
- unless str . empty?
1147
- s . prepend ( str )
1148
- str . clear
1149
- end
1148
+ # We need to be careful matching against multi-byte separators since the
1149
+ # `str` value is being built up progressively.
1150
+ #
1151
+ # If the separator is only a single byte wide and we found it in the buffer
1152
+ # then `count` must be the correct position because there's no way for a single
1153
+ # byte read to be split up; it doesn't matter what `str` contains.
1154
+ #
1155
+ # If the separator is multiple bytes, then we look at `str`. If it's empty,
1156
+ # then the buffer trivially must contain the separator. If not, however, we
1157
+ # must scan the entire `str` after appending the buffer to see if the separator
1158
+ # pattern appears earlier than the position detected by `count`.
1159
+ if str . empty? || separator_byte_size == 1
1160
+ unless str . empty?
1161
+ s . prepend ( str )
1162
+ str . clear
1163
+ end
1164
+
1165
+ s = IO . read_encode ( @io , s )
1166
+
1167
+ s . chomp! ( @separator ) if @chomp
1168
+ $. = @io . __send__ ( :increment_lineno )
1169
+ @buffer . discard @skip if @skip
1150
1170
1151
- s = IO . read_encode ( @io , s )
1171
+ yield s
1152
1172
1153
- s . chomp! ( @separator ) if @chomp
1154
- $. = @io . __send__ ( :increment_lineno )
1155
- @buffer . discard @skip if @skip
1156
-
1157
- yield s
1173
+ next
1174
+ else
1175
+ str << s
1176
+ end
1158
1177
else
1159
1178
str << @buffer . shift
1160
1179
end
1180
+
1181
+ if separator_byte_size > 1 && last_scan_end < str . bytesize
1182
+ # Since the separator could be split over multiple passes of this loop,
1183
+ # it's possible for the separator to never appear completely in `@buffer`,
1184
+ # but may appear in `str` after successive passes. If we found an unambiguous
1185
+ # match in the buffer, we wouldn't be in this branch. Since we are, we need
1186
+ # to check if the separator appears in the total read string.
1187
+ #
1188
+ # Rather than scan the entirety of `str` every time, we track how far we've
1189
+ # previously scanned. Since the separator bytes can span reads, we need to
1190
+ # step back `@separator.bytesize - 1` bytes to ensure we don't skip over the
1191
+ # separator bytes. It's `@separator.bytesize - 1` because if the entire
1192
+ # separator was already in `str` we would have found it on a previous pass of
1193
+ # the loop. Since we also need to subtract one to account for zero-based offsets,
1194
+ # we can include that in our offset and just substract the separator byte size.
1195
+ # On the very first scan we don't need to account for any partial scans of the
1196
+ # separator.
1197
+
1198
+ search_offset = last_scan_end - separator_byte_size
1199
+ search_offset = 0 if search_offset < 0
1200
+
1201
+ found_byte_index = Primitive . string_byte_index ( str , @separator . b , Encoding ::BINARY , search_offset )
1202
+
1203
+ if found_byte_index
1204
+ offset = found_byte_index + @separator . bytesize
1205
+
1206
+ # If we've read more bytes than we need to satisfy the current request, we
1207
+ # need to put the remainder back into the buffer so that subsequent reads
1208
+ # will have the correct bytes.
1209
+ if offset < str . bytesize
1210
+ @buffer . put_back ( str . byteslice ( offset , str . bytesize - offset ) )
1211
+ end
1212
+
1213
+ res = IO . read_encode ( @io , str . byteslice ( 0 , offset ) )
1214
+ res . chomp! ( @separator ) if @chomp
1215
+ $. = @io . __send__ ( :increment_lineno )
1216
+ @buffer . discard @skip if @skip
1217
+
1218
+ str . clear
1219
+ last_scan_end = 0
1220
+
1221
+ yield res
1222
+ else
1223
+ last_scan_end = str . bytesize
1224
+ end
1225
+ end
1161
1226
end
1162
1227
1163
1228
str << @buffer . shift
0 commit comments