Skip to content

Commit

Permalink
2.6.1 Release Work (#679)
Browse files Browse the repository at this point in the history
* rules: add custom twitter video rewriting to capture non-chunked twitter video (max bitrate of 5000000)

* autoescaping regression fix: don't escape URL in frame_insert.html, use as is

* html rewriting:
- don't rewrite 'data-' attributes, no longer necessary for best fidelity
- do rewrite <link rel='alternate'> as main page (mp_)
- update html rewriting test

* feature: support customizing the static path used in pywb via 'static_prefix' config option (defaults to 'static')

* update to latest wombat (3.3.4)

* bump to 2.6.1, update CHANGES for 2.6.1
  • Loading branch information
ikreymer authored Nov 12, 2021
1 parent 96de80f commit a6be766
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 18 deletions.
14 changes: 14 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
pywb 2.6.1 changelist
~~~~~~~~~~~~~~~~~~~~~

* Domain-Specific Rewriting Rules: Rewrite twitter video to capture full videos.

* Disable rewriting ``data-`` attributes, better fidelity without rewriting, fixes `#676 <https://github.com/webrecorder/pywb/pull/676>`_

* Fix regression in autoescaping URL in frame_insert.html

* Feature: ability to set path used to serve static assets (default ``static``) via ``static_prefix`` config option.

* Update wombat.js 3.3.4 (includes various rewriting fixes)


pywb 2.6.0 changelist
~~~~~~~~~~~~~~~~~~~~~

Expand Down
7 changes: 5 additions & 2 deletions pywb/apps/rewriterapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def __init__(self, framed_replay=False, jinja_env=None, config=None, paths=None)

self.enable_memento = self.config.get('enable_memento')

self.static_prefix = self.config.get('static_prefix', 'static')

csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
if csp_header:
self.csp_header = ('Content-Security-Policy', csp_header)
Expand Down Expand Up @@ -323,8 +325,9 @@ def render_content(self, wb_url, kwargs, environ):
rel_prefix = self.get_rel_prefix(environ)
full_prefix = host_prefix + rel_prefix
environ['pywb.host_prefix'] = host_prefix
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
'pywb.static_prefix', '/static/')
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + '/' + self.static_prefix
environ['pywb.static_prefix'] = pywb_static_prefix
pywb_static_prefix += '/'
is_proxy = ('wsgiprox.proxy_host' in environ)

# if OPTIONS in proxy mode, just generate the proxy responss
Expand Down
8 changes: 1 addition & 7 deletions pywb/rewrite/html_rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,6 @@ def _rewrite_tag_attrs(self, tag, tag_attrs, set_parsing_context=True):
rw_mod = handler.get(attr_name)
attr_value = self._rewrite_url(attr_value, rw_mod)

# special case: data- attrs, conditional rewrite
elif attr_name and attr_value and attr_name.startswith('data-'):
if attr_value.startswith(self.DATA_RW_PROTOCOLS):
rw_mod = 'oe_'
attr_value = self._rewrite_url(attr_value, rw_mod)

# special case: base tag
elif (tag == 'base') and (attr_name == 'href') and attr_value:
rw_mod = handler.get(attr_name)
Expand Down Expand Up @@ -469,7 +463,7 @@ def _rewrite_link_href(self, attr_value, tag_attrs, rw_mod):
rw_mod = self.PRELOAD_TYPES.get(preload, rw_mod)

# for html imports with an optional as (google exclusive)
elif rel == 'import':
elif rel == 'import' or rel == 'alternate':
rw_mod = 'mp_'

elif rel == 'stylesheet':
Expand Down
26 changes: 26 additions & 0 deletions pywb/rewrite/rewrite_dash.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,29 @@ def rewrite_fb_dash(string, *args):
string += json.dumps(best_ids)
return string

def rewrite_tw_dash(string, *args):
try:
best_variant = None
best_bitrate = 0
max_bitrate = 5000000

data = json.loads(string)
for variant in data["variants"]:
if variant["content_type"] != "video/mp4":
continue

bitrate = variant.get("bitrate")
if bitrate and bitrate > best_bitrate and bitrate <= max_bitrate:
best_variant = variant
best_bitrate = bitrate

if best_variant:
data["variants"] = [best_variant]

string = json.dumps(data)

except Exception as e:
print(e)

return string

2 changes: 1 addition & 1 deletion pywb/rewrite/templateview.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def render_to_string(self, env, **kwargs):
kwargs.update(params)

kwargs['env'] = env
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
kwargs['static_prefix'] = env.get('pywb.static_prefix')


return template.render(**kwargs)
Expand Down
4 changes: 2 additions & 2 deletions pywb/rewrite/test/test_html_rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@
>>> parse('<meta http-equiv="Content-Security-Policy" content="default-src http://example.com" />')
<meta http-equiv="Content-Security-Policy" _content="default-src http://example.com"/>
# Custom -data attribs
# Don't rewrite Custom -data attribs
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">
# param tag -- rewrite conditionally if url
>>> parse('<param value="http://example.com/"/>')
Expand Down
10 changes: 10 additions & 0 deletions pywb/rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ rules:
- url_prefix: 'com,twitter)/i/videos/tweet'

fuzzy_lookup: '()'

- url_prefix: ['com,twitter,api)/2/', 'com,twitter)/i/api/2/', 'com,twitter)/i/api/graphql/']

rewrite:
js_regexs:
- match: 'video_info":(.*?}]})'
group: 1
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'




# facebook rules
Expand Down
2 changes: 1 addition & 1 deletion pywb/static/wombat.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pywb/templates/frame_insert.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

{{ banner_html }}

{% endautoescape %}

</head>
<body style="margin: 0px; padding: 0px;">

Expand All @@ -35,3 +33,5 @@
</script>
</body>
</html>
{% endautoescape %}

2 changes: 1 addition & 1 deletion pywb/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '2.6.0'
__version__ = '2.6.1'

if __name__ == '__main__':
print(__version__)
2 changes: 1 addition & 1 deletion pywb/warcserver/test/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def setup_class(cls):
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.link_header_data = yaml.load(fh, Loader=yaml.Loader)

MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links

Expand Down
2 changes: 1 addition & 1 deletion wombat

0 comments on commit a6be766

Please sign in to comment.