Skip to content

Commit a531a9f

Browse files
committed
[http] Eliminate _read_iter and simplify read
1 parent b59a203 commit a531a9f

File tree

2 files changed

+39
-26
lines changed

2 files changed

+39
-26
lines changed

smart_open/http.py

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE,
138138
if not self.response.ok:
139139
self.response.raise_for_status()
140140

141-
self._read_iter = iter(lambda: self.response.raw.read(self.buffer_size), b"")
142141
self._read_buffer = bytebuffer.ByteBuffer(buffer_size)
143142
self._current_pos = 0
144143

@@ -150,7 +149,6 @@ def close(self):
150149
logger.debug("close: called")
151150
if not self.closed:
152151
self.response = None
153-
self._read_iter = None
154152
self._read_buffer = None
155153

156154
@property
@@ -175,32 +173,20 @@ def read(self, size=-1):
175173
"""
176174
Mimics the read call to a filehandle object.
177175
"""
176+
if size < -1:
177+
raise ValueError(f'size must be >= -1, got {size}')
178+
178179
logger.debug("reading with size: %d", size)
179-
if self.response is None:
180-
return b''
181-
182-
if size == 0:
183-
return b''
184-
elif size < 0 and len(self._read_buffer) == 0:
185-
retval = self.response.raw.read()
186-
elif size < 0:
180+
if self.closed or size == 0:
181+
return b""
182+
183+
if size == -1:
187184
retval = self._read_buffer.read() + self.response.raw.read()
188185
else:
186+
# Fill _read_buffer until it contains enough bytes
189187
while len(self._read_buffer) < size:
190-
logger.debug(
191-
"http reading more content at current_pos: %d with size: %d",
192-
self._current_pos, size,
193-
)
194-
bytes_read = self._read_buffer.fill(self._read_iter)
195-
if bytes_read == 0:
196-
# Oops, ran out of data early.
197-
retval = self._read_buffer.read()
198-
self._current_pos += len(retval)
199-
200-
return retval
201-
202-
# If we got here, it means we have enough data in the buffer
203-
# to return to the caller.
188+
if self._read_buffer.fill(self.response.raw) == 0:
189+
break # EOF reached
204190
retval = self._read_buffer.read(size)
205191

206192
self._current_pos += len(retval)
@@ -281,13 +267,11 @@ def seek(self, offset, whence=0):
281267

282268
if new_pos == self.content_length:
283269
self.response = None
284-
self._read_iter = None
285270
self._read_buffer.empty()
286271
else:
287272
response = self._partial_request(new_pos)
288273
if response.ok:
289274
self.response = response
290-
self._read_iter = iter(lambda: self.response.raw.read(self.buffer_size), b"")
291275
self._read_buffer.empty()
292276
else:
293277
self.response = None

tests/test_http.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,3 +253,32 @@ def callback(request):
253253
partial = reader.read(2) + reader.read()
254254
assert len(partial) == len(BYTES), f"Expected {len(BYTES)} bytes, got {len(partial)}"
255255
assert partial == BYTES
256+
257+
258+
@responses.activate
259+
def test_read_after_seek_to_eof():
260+
"""Reading after seeking to EOF should return empty bytes."""
261+
responses.add_callback(responses.GET, URL, callback=request_callback)
262+
reader = smart_open.http.SeekableBufferedInputBase(URL)
263+
264+
# Seek to EOF
265+
reader.seek(0, whence=smart_open.constants.WHENCE_END)
266+
assert reader.tell() == len(BYTES)
267+
268+
# Read should return empty bytes, not crash
269+
result = reader.read()
270+
assert result == b''
271+
272+
# Read with size should also return empty bytes
273+
result = reader.read(10)
274+
assert result == b''
275+
276+
277+
@responses.activate
278+
def test_read_with_invalid_size():
279+
"""Read with size < -1 should raise ValueError."""
280+
responses.add_callback(responses.GET, URL, callback=request_callback)
281+
reader = smart_open.http.SeekableBufferedInputBase(URL)
282+
283+
with pytest.raises(ValueError, match='size must be >= -1'):
284+
reader.read(-2)

0 commit comments

Comments
 (0)