Skip to content

Commit a6be766

Browse files
authored
2.6.1 Release Work (#679)
* rules: add custom twitter video rewriting to capture non-chunked twitter video (max bitrate of 5000000) * autoescaping regression fix: don't escape URL in frame_insert.html, use as is * html rewriting: - don't rewrite 'data-' attributes, no longer necessary for best fidelity - do rewrite <link rel='alternate'> as main page (mp_) - update html rewriting test * feature: support customizing the static path used in pywb via 'static_prefix' config option (defaults to 'static') * update to latest wombat (3.3.4) * bump to 2.6.1, update CHANGES for 2.6.1
1 parent 96de80f commit a6be766

File tree

12 files changed

+65
-18
lines changed

12 files changed

+65
-18
lines changed

CHANGES.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
pywb 2.6.1 changelist
2+
~~~~~~~~~~~~~~~~~~~~~
3+
4+
* Domain-Specific Rewriting Rules: Rewrite twitter video to capture full videos.
5+
6+
* Disable rewriting ``data-`` attributes, better fidelity without rewriting, fixes `#676 <https://github.com/webrecorder/pywb/pull/676>`_
7+
8+
* Fix regression in autoescaping URL in frame_insert.html
9+
10+
* Feature: ability to set path used to serve static assets (default ``static``) via ``static_prefix`` config option.
11+
12+
* Update wombat.js 3.3.4 (includes various rewriting fixes)
13+
14+
115
pywb 2.6.0 changelist
216
~~~~~~~~~~~~~~~~~~~~~
317

pywb/apps/rewriterapp.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ def __init__(self, framed_replay=False, jinja_env=None, config=None, paths=None)
9797

9898
self.enable_memento = self.config.get('enable_memento')
9999

100+
self.static_prefix = self.config.get('static_prefix', 'static')
101+
100102
csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
101103
if csp_header:
102104
self.csp_header = ('Content-Security-Policy', csp_header)
@@ -323,8 +325,9 @@ def render_content(self, wb_url, kwargs, environ):
323325
rel_prefix = self.get_rel_prefix(environ)
324326
full_prefix = host_prefix + rel_prefix
325327
environ['pywb.host_prefix'] = host_prefix
326-
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
327-
'pywb.static_prefix', '/static/')
328+
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + '/' + self.static_prefix
329+
environ['pywb.static_prefix'] = pywb_static_prefix
330+
pywb_static_prefix += '/'
328331
is_proxy = ('wsgiprox.proxy_host' in environ)
329332

330333
# if OPTIONS in proxy mode, just generate the proxy responss

pywb/rewrite/html_rewriter.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -416,12 +416,6 @@ def _rewrite_tag_attrs(self, tag, tag_attrs, set_parsing_context=True):
416416
rw_mod = handler.get(attr_name)
417417
attr_value = self._rewrite_url(attr_value, rw_mod)
418418

419-
# special case: data- attrs, conditional rewrite
420-
elif attr_name and attr_value and attr_name.startswith('data-'):
421-
if attr_value.startswith(self.DATA_RW_PROTOCOLS):
422-
rw_mod = 'oe_'
423-
attr_value = self._rewrite_url(attr_value, rw_mod)
424-
425419
# special case: base tag
426420
elif (tag == 'base') and (attr_name == 'href') and attr_value:
427421
rw_mod = handler.get(attr_name)
@@ -469,7 +463,7 @@ def _rewrite_link_href(self, attr_value, tag_attrs, rw_mod):
469463
rw_mod = self.PRELOAD_TYPES.get(preload, rw_mod)
470464

471465
# for html imports with an optional as (google exclusive)
472-
elif rel == 'import':
466+
elif rel == 'import' or rel == 'alternate':
473467
rw_mod = 'mp_'
474468

475469
elif rel == 'stylesheet':

pywb/rewrite/rewrite_dash.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,29 @@ def rewrite_fb_dash(string, *args):
8686
string += json.dumps(best_ids)
8787
return string
8888

89+
def rewrite_tw_dash(string, *args):
90+
try:
91+
best_variant = None
92+
best_bitrate = 0
93+
max_bitrate = 5000000
94+
95+
data = json.loads(string)
96+
for variant in data["variants"]:
97+
if variant["content_type"] != "video/mp4":
98+
continue
99+
100+
bitrate = variant.get("bitrate")
101+
if bitrate and bitrate > best_bitrate and bitrate <= max_bitrate:
102+
best_variant = variant
103+
best_bitrate = bitrate
104+
105+
if best_variant:
106+
data["variants"] = [best_variant]
107+
108+
string = json.dumps(data)
109+
110+
except Exception as e:
111+
print(e)
112+
113+
return string
114+

pywb/rewrite/templateview.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def render_to_string(self, env, **kwargs):
322322
kwargs.update(params)
323323

324324
kwargs['env'] = env
325-
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
325+
kwargs['static_prefix'] = env.get('pywb.static_prefix')
326326

327327

328328
return template.render(**kwargs)

pywb/rewrite/test/test_html_rewriter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@
138138
>>> parse('<meta http-equiv="Content-Security-Policy" content="default-src http://example.com" />')
139139
<meta http-equiv="Content-Security-Policy" _content="default-src http://example.com"/>
140140
141-
# Custom -data attribs
141+
# Don't rewrite Custom -data attribs
142142
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
143-
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
143+
<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">
144144
145145
# param tag -- rewrite conditionally if url
146146
>>> parse('<param value="http://example.com/"/>')

pywb/rules.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ rules:
6767
- url_prefix: 'com,twitter)/i/videos/tweet'
6868

6969
fuzzy_lookup: '()'
70+
71+
- url_prefix: ['com,twitter,api)/2/', 'com,twitter)/i/api/2/', 'com,twitter)/i/api/graphql/']
72+
73+
rewrite:
74+
js_regexs:
75+
- match: 'video_info":(.*?}]})'
76+
group: 1
77+
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'
78+
79+
7080

7181

7282
# facebook rules

pywb/static/wombat.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pywb/templates/frame_insert.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
{{ banner_html }}
2020

21-
{% endautoescape %}
22-
2321
</head>
2422
<body style="margin: 0px; padding: 0px;">
2523

@@ -35,3 +33,5 @@
3533
</script>
3634
</body>
3735
</html>
36+
{% endautoescape %}
37+

pywb/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '2.6.0'
1+
__version__ = '2.6.1'
22

33
if __name__ == '__main__':
44
print(__version__)

pywb/warcserver/test/testutils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def setup_class(cls):
111111
# Load expected link headers
112112
MementoOverrideTests.link_header_data = None
113113
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
114-
MementoOverrideTests.link_header_data = yaml.load(fh)
114+
MementoOverrideTests.link_header_data = yaml.load(fh, Loader=yaml.Loader)
115115

116116
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links
117117

0 commit comments

Comments
 (0)