Skip to content

Commit 63d247a

Browse files
authored
FB + other dynamic site capture improvements (#249)
- update to wabac.js 2.19.6 for improved FB rewriting rules - update to browsertrix-behaviors 0.6.4 to fix async fetch, even when behaviors aren't running on autopilot - Don't truncate POST bodies if URL is handled by custom rules - bump to 0.12.6
1 parent e4932ff commit 63d247a

File tree

4 files changed

+46
-36
lines changed

4 files changed

+46
-36
lines changed

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@webrecorder/archivewebpage",
33
"productName": "ArchiveWeb.page",
4-
"version": "0.12.5",
4+
"version": "0.12.6",
55
"main": "index.js",
66
"description": "Create Web Archives directly in your browser",
77
"repository": "https://github.com/webrecorder/archiveweb.page",
@@ -11,9 +11,9 @@
1111
"@fortawesome/fontawesome-free": "^5.13.0",
1212
"@ipld/car": "^5.3.1",
1313
"@webrecorder/awp-sw": "^0.4.4",
14-
"@webrecorder/wabac": "^2.19.4",
14+
"@webrecorder/wabac": "^2.19.6",
1515
"auto-js-ipfs": "^2.3.0",
16-
"browsertrix-behaviors": "^0.6.0",
16+
"browsertrix-behaviors": "^0.6.4",
1717
"btoa": "^1.2.1",
1818
"bulma": "^0.9.3",
1919
"client-zip": "^2.2.2",

src/recorder.js

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { RequestResponseInfo } from "./requestresponseinfo.js";
22

3-
import { baseRules as baseDSRules, htmlRules as htmlDSRules } from "@webrecorder/wabac/src/rewrite";
3+
import { getCustomRewriter } from "@webrecorder/wabac/src/rewrite";
44
import { rewriteDASH, rewriteHLS } from "@webrecorder/wabac/src/rewrite/rewriteVideo";
55
import { Buffer } from "buffer";
66

@@ -1009,10 +1009,9 @@ class Recorder {
10091009
case "text/javascript":
10101010
case "application/javascript":
10111011
case "application/x-javascript": {
1012-
const rules = ct === "text/html" ? htmlDSRules : baseDSRules;
1013-
const rw = rules.getRewriter(url);
1012+
const rw = getCustomRewriter(url, ct === "text/html");
10141013

1015-
if (rw !== rules.defaultRewriter) {
1014+
if (rw) {
10161015
string = payload.toString();
10171016
newString = rw.rewrite(string, {live: true, save: extraOpts});
10181017
}

src/requestresponseinfo.js

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"use strict";
22

3+
import { getCustomRewriter } from "@webrecorder/wabac/src/rewrite";
34
import { getStatusText } from "@webrecorder/wabac/src/utils";
45

56
import { postToGetUrl } from "warcio";
@@ -191,20 +192,23 @@ class RequestResponseInfo
191192
postData: this.postData || "",
192193
};
193194
if (postToGetUrl(convData)) {
194-
//this.requestBody = convData.requestBody;
195-
// truncate to avoid extra long URLs
196-
try {
197-
const url = new URL(convData.url);
198-
for (const [key, value] of url.searchParams.entries()) {
199-
if (value && value.length > MAX_ARG_LEN) {
200-
url.searchParams.set(key, value.slice(0, MAX_ARG_LEN));
195+
// if URL for custom rewriting, keep as is, otherwise truncate to avoid extra long URLs
196+
if (getCustomRewriter(this.url, mime === "text/html")) {
197+
this.url = convData.url;
198+
} else {
199+
try {
200+
const url = new URL(convData.url);
201+
for (const [key, value] of url.searchParams.entries()) {
202+
if (value && value.length > MAX_ARG_LEN) {
203+
url.searchParams.set(key, value.slice(0, MAX_ARG_LEN));
204+
}
201205
}
206+
convData.url = url.href;
207+
} catch (e) {
208+
//ignore
202209
}
203-
convData.url = url.href;
204-
} catch (e) {
205-
//ignore
210+
this.url = convData.url.slice(0, MAX_URL_LENGTH);
206211
}
207-
this.url = convData.url.slice(0, MAX_URL_LENGTH);
208212
}
209213
}
210214

yarn.lock

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -984,21 +984,21 @@
984984
uuid "^9.0.0"
985985
warcio "^2.2.1"
986986

987-
"@webrecorder/wabac@^2.17.3", "@webrecorder/wabac@^2.18.1", "@webrecorder/wabac@^2.19.4":
988-
version "2.19.4"
989-
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.19.4.tgz#6c91a65928413b8394f17b57f57a803dcb111dbe"
990-
integrity sha512-USWUoreSfgyeYYrC2/o2YYr4dCUSwgOSzbpdapqh90VQ4Fb0fjwPAiessBCH4rA5yd9QpOgWdkapDmXvLx6Bww==
987+
"@webrecorder/wabac@^2.17.3", "@webrecorder/wabac@^2.18.1", "@webrecorder/wabac@^2.19.6":
988+
version "2.19.6"
989+
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.19.6.tgz#775078cc752eb29a15fc2835a1484c32f85661cb"
990+
integrity sha512-3DzrASJBfwaFjtpYhISaYidYD8JgOAeGqx1ciFKSWo7cXnTQWtfyNGQfJoPLunfuzZvKBoxakEdcuE+Hl8q4rA==
991991
dependencies:
992992
"@peculiar/asn1-ecc" "^2.3.4"
993993
"@peculiar/asn1-schema" "^2.3.3"
994994
"@peculiar/x509" "^1.9.2"
995-
"@webrecorder/wombat" "^3.7.11"
995+
"@webrecorder/wombat" "^3.7.12"
996996
acorn "^8.10.0"
997997
auto-js-ipfs "^2.1.1"
998998
base64-js "^1.5.1"
999999
brotli "^1.3.3"
10001000
buffer "^6.0.3"
1001-
fast-xml-parser "^4.4.0"
1001+
fast-xml-parser "^4.4.1"
10021002
hash-wasm "^4.9.0"
10031003
http-link-header "^1.1.3"
10041004
http-status-codes "^2.1.4"
@@ -1013,10 +1013,10 @@
10131013
stream-browserify "^3.0.0"
10141014
warcio "^2.2.1"
10151015

1016-
"@webrecorder/wombat@^3.7.11":
1017-
version "3.7.11"
1018-
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.7.11.tgz#27539f52317b2d80af4f28d971d59b53bc0f2b96"
1019-
integrity sha512-WlGpKjHUpP2aZo/OrY5aduNX/TVdo+hSkzu9as/63wSQ4ZFWIqZ+pxYXci43hjV5oVjcMP4KALLq+V+Fuo8qSA==
1016+
"@webrecorder/wombat@^3.7.12":
1017+
version "3.7.12"
1018+
resolved "https://registry.yarnpkg.com/@webrecorder/wombat/-/wombat-3.7.12.tgz#b2328ebfcea4f8acafdf1f81dea1d10a576b0357"
1019+
integrity sha512-MqSUxzSiapTGuoPeh7FNIe6ZX//KiCIiSydByzFqujin/e1nG7pmw7x2JgGeyWPYH6hYN/RxrpBcqJRBmYtHRg==
10201020
dependencies:
10211021
warcio "^2.2.0"
10221022

@@ -1415,10 +1415,12 @@ browserslist@^4.21.10:
14151415
node-releases "^2.0.14"
14161416
update-browserslist-db "^1.0.13"
14171417

1418-
browsertrix-behaviors@^0.6.0:
1419-
version "0.6.0"
1420-
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.6.0.tgz#e16345e4b414b18e6441548d517d01b4316f744e"
1421-
integrity sha512-BdfEPHmDjhEIFrn80UKnwGT6HRgnmq2shNybu8BEfAHJQsqZdvP/VVKWvNGnWML1jjUKiwtvtkdFhtHedFQkzA==
1418+
browsertrix-behaviors@^0.6.4:
1419+
version "0.6.4"
1420+
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.6.4.tgz#33fe9a433108f2faac3a03af91aff940433e5b87"
1421+
integrity sha512-xaiO/VqqeSd5FnAkIKQINxC/q3Med33Lqw3LGxD4NBtkcMSh1Anz/+830QHVlQbp08nIPUXYV96hDrx1Uv0PmQ==
1422+
dependencies:
1423+
query-selector-shadow-dom "^1.0.1"
14221424

14231425
btoa@^1.2.1:
14241426
version "1.2.1"
@@ -2537,10 +2539,10 @@ fast-uri@^2.3.0:
25372539
resolved "https://registry.yarnpkg.com/fast-uri/-/fast-uri-2.3.0.tgz#bdae493942483d299e7285dcb4627767d42e2793"
25382540
integrity sha512-eel5UKGn369gGEWOqBShmFJWfq/xSJvsgDzgLYC845GneayWvXBf0lJCBn5qTABfewy1ZDPoaR5OZCP+kssfuw==
25392541

2540-
fast-xml-parser@^4.4.0:
2541-
version "4.4.0"
2542-
resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.0.tgz#341cc98de71e9ba9e651a67f41f1752d1441a501"
2543-
integrity sha512-kLY3jFlwIYwBNDojclKsNAC12sfD6NwW74QB2CoNGPvtVxjliYehVunB3HYyNi+n4Tt1dAcgwYvmKF/Z18flqg==
2542+
fast-xml-parser@^4.4.1:
2543+
version "4.4.1"
2544+
resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz#86dbf3f18edf8739326447bcaac31b4ae7f6514f"
2545+
integrity sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==
25442546
dependencies:
25452547
strnum "^1.0.5"
25462548

@@ -4611,6 +4613,11 @@ qs@6.11.0:
46114613
dependencies:
46124614
side-channel "^1.0.4"
46134615

4616+
query-selector-shadow-dom@^1.0.1:
4617+
version "1.0.1"
4618+
resolved "https://registry.yarnpkg.com/query-selector-shadow-dom/-/query-selector-shadow-dom-1.0.1.tgz#1c7b0058eff4881ac44f45d8f84ede32e9a2f349"
4619+
integrity sha512-lT5yCqEBgfoMYpf3F2xQRK7zEr1rhIIZuceDK6+xRkJQ4NMbHTwXqk4NkwDwQMNqXgG9r9fyHnzwNVs6zV5KRw==
4620+
46144621
queue-microtask@^1.2.2:
46154622
version "1.2.3"
46164623
resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243"

0 commit comments

Comments
 (0)