Skip to content

Commit 1fddec2

Browse files
authored
Add ir_ modifier (#759)
* rewrite: add 'ir_' mod to support header only url-rewriting with no content rewriting * tests: add tests for ir_ to test that content is identical to id_, but Location headers are rewritten with ir_ modifier.
1 parent 8ef4ff1 commit 1fddec2

File tree

3 files changed

+29
-3
lines changed

3 files changed

+29
-3
lines changed

pywb/rewrite/content_rewriter.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,7 @@ def should_rw_content(self):
524524
if not self.text_type:
525525
return False
526526

527-
if self.url_rewriter.wburl.mod == 'id_':
527+
if self.is_identity():
528528
return False
529529

530530
if self.url_rewriter.rewrite_opts.get('is_ajax'):
@@ -537,9 +537,11 @@ def should_rw_content(self):
537537

538538
return True
539539

540+
def is_identity(self):
541+
return self.url_rewriter.wburl.mod in ('id_', 'ir_')
542+
540543
def is_url_rw(self):
541544
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
542545
return False
543546

544547
return True
545-

pywb/rewrite/default_rewriter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def __init__(self, replay_mod='', config=None):
102102
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
103103
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
104104

105+
self.add_prefer_mod('raw', 'ir_')
105106
self.add_prefer_mod('raw', 'id_')
106107
self.add_prefer_mod('banner-only', 'bn_')
107108
self.add_prefer_mod('rewritten', replay_mod)

tests/test_integration.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,19 @@ def test_replay_resource(self, fmod):
138138

139139
def test_replay_redirect(self, fmod):
140140
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
141-
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod))
141+
assert resp.headers['Location'] == '/pywb/2014{0}/http://www.iana.org/domains/reserved'.format(fmod)
142+
assert resp.status_code == 302
143+
144+
def test_replay_redirect_id(self):
145+
resp = self.get('/pywb/2014id_/http://www.iana.org/domains/example', fmod)
146+
print(resp.headers['Location'])
147+
assert resp.headers['Location'] == '/domains/reserved'
148+
assert resp.status_code == 302
149+
150+
def test_replay_redirect_ir(self):
151+
resp = self.get('/pywb/2014ir_/http://www.iana.org/domains/example', fmod)
152+
print(resp.headers['Location'])
153+
assert resp.headers['Location'] == '/pywb/2014ir_/http://www.iana.org/domains/reserved'
142154
assert resp.status_code == 302
143155

144156
def test_replay_fuzzy_1(self, fmod):
@@ -224,6 +236,17 @@ def test_replay_identity_1(self):
224236
# original unrewritten url present
225237
assert '"http://www.iana.org/domains/example"' in resp.text
226238

239+
def test_replay_identity_1_ir(self):
240+
resp = self.testapp.get('/pywb/20140127171251ir_/http://example.com/')
241+
242+
# no wb header insertion
243+
assert 'wombat.js' not in resp.text
244+
245+
assert resp.content_length == 1270, resp.content_length
246+
247+
# original unrewritten url present
248+
assert '"http://www.iana.org/domains/example"' in resp.text
249+
227250
def test_replay_identity_2_arcgz(self):
228251
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
229252

0 commit comments

Comments
 (0)