Skip to content

Commit 51919ed

Browse files
committed
replay: make range cache available by default in replay_views since its
inited on first use. remove separate subclass. 'enable_ranges' can be set to false to disable range cache altogether improve tests
1 parent 3819e93 commit 51919ed

File tree

7 files changed

+61
-72
lines changed

7 files changed

+61
-72
lines changed

pywb/utils/test/test_loaders.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
5454
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x')
5555
56+
>>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x')
57+
5658
>>> extract_client_cookie({}, 'y')
5759
"""
5860

pywb/webapp/cached_replay.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

pywb/webapp/handlers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,7 @@ def __init__(self, query_handler, config=None):
120120
resolving_loader = ResolvingLoader(paths=paths,
121121
record_loader=record_loader)
122122

123-
enable_cache = config.get('enable_cache')
124-
if enable_cache:
125-
self.replay = CachedReplayView(resolving_loader, config)
126-
else:
127-
self.replay = ReplayView(resolving_loader, config)
123+
self.replay = ReplayView(resolving_loader, config)
128124

129125
self.fallback_handler = None
130126
self.fallback_name = config.get('fallback')

pywb/webapp/rangecache.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,6 @@ def cleanup(self):
2424
shutil.rmtree(self.temp_dir, True)
2525
self.temp_dir = None
2626

27-
def __call__(self, wbrequest, digest, wbresponse_func):
28-
result = wbrequest.extract_range()
29-
if not result:
30-
return None, None
31-
32-
# no longer needed -- handled at frontend rewrite
33-
#if wbrequest.env.get('HTTP_X_IGNORE_RANGE_ARG'):
34-
# wbrequest.wb_url.url = result[0]
35-
# return None, None
36-
37-
return self.handle_range(wbrequest,
38-
digest,
39-
wbresponse_func,
40-
*result)
41-
4227
def handle_range(self, wbrequest, digest, wbresponse_func,
4328
url, start, end, use_206):
4429

pywb/webapp/replay_views.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from views import J2TemplateView, add_env_globals
1616
from views import J2HtmlCapturesView, HeadInsertView
1717

18+
from rangecache import range_cache
19+
1820

1921
#=================================================================
2022
class CaptureException(WbException):
@@ -49,6 +51,8 @@ def __init__(self, content_loader, config):
4951
else:
5052
self.response_class = WbResponse
5153

54+
self.enable_range_cache = config.get('enable_ranges', True)
55+
5256
self._reporter = config.get('reporter')
5357

5458
def render_content(self, wbrequest, cdx_lines, cdx_loader):
@@ -77,10 +81,10 @@ def render_content(self, wbrequest, cdx_lines, cdx_loader):
7781

7882
first = False
7983

80-
response = self.replay_capture(wbrequest,
81-
cdx,
82-
cdx_loader,
83-
failed_files)
84+
response = self.cached_replay_capture(wbrequest,
85+
cdx,
86+
cdx_loader,
87+
failed_files)
8488

8589
except (CaptureException, ArchiveLoadFailed) as ce:
8690
import traceback
@@ -99,6 +103,33 @@ def render_content(self, wbrequest, cdx_lines, cdx_loader):
99103

100104
raise last_e
101105

106+
def cached_replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
107+
def get_capture():
108+
return self.replay_capture(wbrequest,
109+
cdx,
110+
cdx_loader,
111+
failed_files)
112+
113+
if not self.enable_range_cache:
114+
return get_capture()
115+
116+
range_info = wbrequest.extract_range()
117+
118+
if not range_info:
119+
return get_capture()
120+
121+
range_status, range_iter = (range_cache.
122+
handle_range(wbrequest,
123+
cdx.get('digest'),
124+
get_capture,
125+
*range_info))
126+
127+
response = self.response_class(range_status,
128+
range_iter,
129+
wbrequest=wbrequest,
130+
cdx=cdx)
131+
return response
132+
102133
def replay_capture(self, wbrequest, cdx, cdx_loader, failed_files):
103134
(status_headers, stream) = (self.content_loader.
104135
resolve_headers_and_payload(cdx,

tests/test_config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ collections:
3838
index_paths: ./sample_archive/cdx/
3939
fallback: live
4040

41-
pywb-rangecache:
41+
pywb-norange:
4242
index_paths: ./sample_archive/cdx/
43-
enable_cache: true
43+
enable_ranges: False
4444

4545
# indicate if cdx files are sorted by SURT keys -- eg: com,example)/
4646
# SURT keys are recommended for future indices, but non-SURT cdxs

tests/test_integration.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,6 @@ def test_replay_content(self):
101101
assert 'new _WBWombat' in resp.body, resp.body
102102
assert '/pywb/20140127171238/http://www.iana.org/time-zones"' in resp.body
103103

104-
def test_replay_content_with_rangecache(self):
105-
resp = self.testapp.get('/pywb-rangecache/20140127171238/http://www.iana.org/')
106-
self._assert_basic_html(resp)
107-
108-
assert '"20140127171238"' in resp.body
109-
assert 'wb.js' in resp.body
110-
assert 'new _WBWombat' in resp.body, resp.body
111-
assert '/pywb-rangecache/20140127171238/http://www.iana.org/time-zones"' in resp.body
112-
113104
def test_replay_non_frame_content(self):
114105
resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
115106
self._assert_basic_html(resp)
@@ -142,6 +133,11 @@ def test_replay_url_agnostic_revisit(self):
142133
assert 'wb.js' in resp.body
143134
assert '/pywb/20130729195151/http://www.iana.org/domains/example"' in resp.body
144135

136+
def test_video_info_not_found(self):
137+
# not actually archived, but ensure video info path is tested
138+
resp = self.testapp.get('/pywb/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M', status=404)
139+
assert resp.status_int == 404
140+
145141
def test_replay_cdx_mod(self):
146142
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
147143
self._assert_basic_text(resp)
@@ -177,7 +173,7 @@ def test_replay_identity_1(self):
177173

178174
def test_replay_range_cache_content(self):
179175
headers = [('Range', 'bytes=0-200')]
180-
resp = self.testapp.get('/pywb-rangecache/20140127171251id_/http://example.com', headers=headers)
176+
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com', headers=headers)
181177

182178
assert resp.status_int == 206
183179
assert resp.headers['Accept-Ranges'] == 'bytes'
@@ -186,9 +182,22 @@ def test_replay_range_cache_content(self):
186182

187183
assert 'wb.js' not in resp.body
188184

185+
def test_replay_content_ignore_range(self):
186+
headers = [('Range', 'bytes=0-200')]
187+
resp = self.testapp.get('/pywb-norange/20140127171251id_/http://example.com', headers=headers)
188+
189+
# range request ignored
190+
assert resp.status_int == 200
191+
192+
# full response
193+
assert resp.content_length == 1270, resp.content_length
194+
195+
# identity, no header insertion
196+
assert 'wb.js' not in resp.body
197+
189198
def test_replay_range_cache_content_bound_end(self):
190199
headers = [('Range', 'bytes=10-10000')]
191-
resp = self.testapp.get('/pywb-rangecache/20140127171251id_/http://example.com', headers=headers)
200+
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com', headers=headers)
192201

193202
assert resp.status_int == 206
194203
assert resp.headers['Accept-Ranges'] == 'bytes'
@@ -201,7 +210,7 @@ def test_replay_range_cache_content_bound_end(self):
201210
def test_replay_redir_no_cache(self):
202211
headers = [('Range', 'bytes=10-10000')]
203212
# Range ignored
204-
resp = self.testapp.get('/pywb-rangecache/20140126200927/http://www.iana.org/domains/root/db/', headers=headers)
213+
resp = self.testapp.get('/pywb/20140126200927/http://www.iana.org/domains/root/db/', headers=headers)
205214
assert resp.content_length == 0
206215
assert resp.status_int == 302
207216

0 commit comments

Comments
 (0)