Skip to content

Commit cb8b269

Browse files
N0taN3rdikreymer
authored andcommitted
improved the rewrite_html_full check in wombat: (#398)
- FullHTMLRegex: performs a case insensitive check for <html, <body, <head and <!doctype html> updated rewrite_elem to: - rewrite meta tags that deliever csp policies - check for additional attributes that could contain un-rewritten URLs (form.style, iframe.style) Made check for full html into regex
1 parent 82f2dac commit cb8b269

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

pywb/static/wombat.js

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
137137
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
138138
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
139139
var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
140+
var FullHTMLRegex = /^\s*<(?:html|head|body|!doctype html)/i;
140141

141142
function rwModForElement(elem, attrName) {
142143
// this function was created to help add in retrial of element attribute rewrite modifiers
@@ -1767,12 +1768,19 @@ var _WBWombat = function($wbwindow, wbinfo) {
17671768
return;
17681769
}
17691770

1770-
var changed;
1771+
var changed = false;
17711772
// we use a switch now cause perf and complexity
17721773
switch (elem.tagName) {
1774+
case 'META':
1775+
var maybeCSP = wb_getAttribute.call(elem, 'http-equiv');
1776+
if (maybeCSP && maybeCSP.toLowerCase() === 'content-security-policy') {
1777+
wb_setAttribute.call(elem, 'http-equiv', '_' + maybeCSP);
1778+
changed = true;
1779+
}
1780+
break;
17731781
case 'STYLE':
17741782
var new_content = rewrite_style(elem.textContent);
1775-
if (elem.textContent !== new_content) {
1783+
if (elem.textContent != new_content) {
17761784
elem.textContent = new_content;
17771785
changed = true;
17781786
if (wbUseAFWorker && elem.sheet != null) {
@@ -1799,10 +1807,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
17991807
break;
18001808
case 'FORM':
18011809
changed = rewrite_attr(elem, "action", true);
1810+
changed = rewrite_attr(elem, 'style') || changed;
18021811
break;
18031812
case 'IFRAME':
18041813
case 'FRAME':
18051814
changed = rewrite_frame_src(elem, "src");
1815+
changed = rewrite_attr(elem, 'style') || changed;
18061816
break;
18071817
case 'SCRIPT':
18081818
changed = rewrite_script(elem);
@@ -1813,6 +1823,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
18131823
default:
18141824
if (elem instanceof SVGElement && elem.hasAttribute('filter')) {
18151825
changed = rewrite_attr(elem, 'filter');
1826+
changed = rewrite_attr(elem, 'style') || changed;
18161827
} else {
18171828
changed = rewrite_attr(elem, 'src');
18181829
changed = rewrite_attr(elem, 'srcset') || changed;
@@ -1839,7 +1850,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
18391850

18401851
var write_buff = "";
18411852

1842-
//============================================
1853+
//===========================================
18431854
function rewrite_html(string, check_end_tag) {
18441855
if (!string) {
18451856
return string;
@@ -1859,7 +1870,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
18591870
string = string.replace(/((id|class)=".*)WB_wombat_([^"]+)/, '$1$3');
18601871
}
18611872

1862-
if (!$wbwindow.HTMLTemplateElement || starts_with(string, ["<html", "<head", "<body"])) {
1873+
if (!$wbwindow.HTMLTemplateElement || FullHTMLRegex.test(string)) {
18631874
return rewrite_html_full(string, check_end_tag);
18641875
}
18651876

0 commit comments

Comments
 (0)