Skip to content

Commit 7a0680f

Browse files
committed
memento: for not found timemap query, return empty timemap, instead of html query error page, closes #158
1 parent 9f77ac5 commit 7a0680f

File tree

4 files changed

+67
-13
lines changed

4 files changed

+67
-13
lines changed

pywb/framework/memento.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,19 @@ def make_timemap(wbrequest, cdx_lines):
181181
mod = wbrequest.options.get('replay_mod', '')
182182

183183
# get first memento as it'll be used for 'from' field
184-
first_cdx = cdx_lines.next()
185-
from_date = timestamp_to_http_date(first_cdx['timestamp'])
184+
try:
185+
first_cdx = cdx_lines.next()
186+
from_date = timestamp_to_http_date(first_cdx['timestamp'])
187+
except StopIteration:
188+
first_cdx = None
186189

187-
# timemap link
188-
timemap = ('<{0}>; rel="self"; ' +
189-
'type="application/link-format"; from="{1}",\n')
190-
yield timemap.format(prefix + wbrequest.wb_url.to_str(),
191-
from_date)
190+
191+
if first_cdx:
192+
# timemap link
193+
timemap = ('<{0}>; rel="self"; ' +
194+
'type="application/link-format"; from="{1}",\n')
195+
yield timemap.format(prefix + wbrequest.wb_url.to_str(),
196+
from_date)
192197

193198
# original link
194199
original = '<{0}>; rel="original",\n'
@@ -202,6 +207,12 @@ def make_timemap(wbrequest, cdx_lines):
202207

203208
yield timegate.format(prefix + timegate_url)
204209

210+
if not first_cdx:
211+
# terminating timemap link, no from
212+
timemap = ('<{0}>; rel="self"; type="application/link-format"')
213+
yield timemap.format(prefix + wbrequest.wb_url.to_str())
214+
return
215+
205216
# first memento link
206217
yield make_timemap_memento_link(first_cdx, prefix,
207218
datetime=from_date, mod=mod)

pywb/webapp/handlers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,8 @@ def handle_not_found(self, wbrequest, nfe):
176176

177177
# if capture query, just return capture page
178178
if wbrequest.wb_url.is_query():
179-
return self.index_reader.make_cdx_response(wbrequest, [], 'html')
179+
output = self.index_reader.get_output_type(wbrequest.wb_url)
180+
return self.index_reader.make_cdx_response(wbrequest, iter([]), output)
180181
else:
181182
return self.not_found_view.render_response(status='404 Not Found',
182183
wbrequest=wbrequest,

pywb/webapp/query_handler.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ def init_from_config(config,
4646

4747
return QueryHandler(cdx_server, html_view, perms_policy)
4848

49-
def load_for_request(self, wbrequest):
50-
wbrequest.normalize_post_query()
51-
52-
wb_url = wbrequest.wb_url
53-
49+
def get_output_type(self, wb_url):
5450
# cdx server only supports text and cdxobject for now
5551
if wb_url.mod == 'cdx_':
5652
output = 'text'
@@ -61,6 +57,14 @@ def load_for_request(self, wbrequest):
6157
else:
6258
output = 'cdxobject'
6359

60+
return output
61+
62+
def load_for_request(self, wbrequest):
63+
wbrequest.normalize_post_query()
64+
65+
wb_url = wbrequest.wb_url
66+
output = self.get_output_type(wb_url)
67+
6468
# init standard params
6569
params = self.get_query_params(wb_url)
6670

tests/test_memento.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,44 @@ def test_timemap_2(self):
306306

307307
assert len(lines) == 3 + 3
308308

309+
310+
def test_timemap_not_found(self):
311+
"""
312+
Test application/link-format timemap
313+
"""
314+
315+
resp = self.testapp.get('/pywb/timemap/*/http://example.com/blah/not_found')
316+
assert resp.status_int == 200
317+
assert resp.content_type == LINK_FORMAT
318+
319+
lines = resp.body.split('\n')
320+
321+
assert len(lines) == 3
322+
323+
assert lines[0] == '<http://example.com/blah/not_found>; rel="original",'
324+
325+
assert lines[1] == '<http://localhost:80/pywb/http://example.com/blah/not_found>; rel="timegate",'
326+
327+
assert lines[2] == '<http://localhost:80/pywb/timemap/*/http://example.com/blah/not_found>; \
328+
rel="self"; type="application/link-format"'
329+
330+
331+
def test_timemap_2(self):
332+
"""
333+
Test application/link-format timemap total count
334+
"""
335+
336+
resp = self.testapp.get('/pywb/timemap/*/http://example.com')
337+
assert resp.status_int == 200
338+
assert resp.content_type == LINK_FORMAT
339+
340+
lines = resp.body.split('\n')
341+
342+
assert len(lines) == 3 + 3
343+
344+
345+
346+
309347
# Below functions test pywb proxy mode behavior
310348
# They are designed to roughly conform to Memento protocol Pattern 1.3
311349
# with the exception that the original resource is not available

0 commit comments

Comments
 (0)