Skip to content

Commit 42d428a

Browse files
authored
facebook fix + configurable storage/cookie archiving (#277)
support for configurable cookie / storage archiving, from UI settings from #276 fix for facebook archiving + replay using dash rewriting + range-from-query replay via wabac.js 2.20.4 fixes #272, #273
1 parent 8d925d6 commit 42d428a

File tree

7 files changed

+239
-96
lines changed

7 files changed

+239
-96
lines changed

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
"dependencies": {
1414
"@fortawesome/fontawesome-free": "^5.13.0",
1515
"@ipld/car": "^5.3.1",
16-
"@webrecorder/awp-sw": "^0.5.0",
17-
"@webrecorder/wabac": "^2.20.3",
16+
"@webrecorder/awp-sw": "^0.5.2",
17+
"@webrecorder/wabac": "^2.20.5",
1818
"auto-js-ipfs": "^2.3.0",
1919
"browsertrix-behaviors": "^0.6.4",
2020
"btoa": "^1.2.1",
@@ -31,7 +31,7 @@
3131
"tsconfig-paths-webpack-plugin": "^4.1.0",
3232
"unused-filename": "^4.0.1",
3333
"uuid": "^8.3.2",
34-
"warcio": "^2.3.1"
34+
"warcio": "^2.4.2"
3535
},
3636
"devDependencies": {
3737
"@typescript-eslint/eslint-plugin": "^6.15.0",
@@ -64,7 +64,7 @@
6464
"webpack-extension-reloader": "^1.1.4"
6565
},
6666
"resolutions": {
67-
"@webrecorder/wabac": "^2.20.3"
67+
"@webrecorder/wabac": "^2.20.5"
6868
},
6969
"files": [
7070
"src/",

src/localstorage.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@ export function setLocalOption(name, value) {
1919
}
2020

2121
// ===========================================================================
22-
// @ts-expect-error - TS7006 - Parameter 'name' implicitly has an 'any' type.
23-
export function getLocalOption(name) {
22+
export function getLocalOption(name: string) : Promise<string | null> {
2423
// @ts-expect-error - TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'. | TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'.
2524
if (self.chrome?.storage) {
26-
return new Promise((resolve) => {
25+
return new Promise<string>((resolve) => {
2726
// @ts-expect-error - TS2339 - Property 'chrome' does not exist on type 'Window & typeof globalThis'.
2827
self.chrome.storage.local.get(name, (res) => {
2928
resolve(res[name]);
@@ -35,7 +34,7 @@ export function getLocalOption(name) {
3534
return Promise.resolve(localStorage.getItem(name));
3635
}
3736

38-
return Promise.reject();
37+
return Promise.reject(null);
3938
}
4039

4140
// ===========================================================================

src/recorder.ts

Lines changed: 98 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { RequestResponseInfo } from "./requestresponseinfo";
22

3-
import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac";
3+
import {
4+
getCustomRewriter,
5+
rewriteDASH,
6+
rewriteHLS,
7+
removeRangeAsQuery,
8+
} from "@webrecorder/wabac";
49

510
import { Buffer } from "buffer";
611

@@ -15,6 +20,7 @@ import {
1520
BEHAVIOR_PAUSED,
1621
BEHAVIOR_DONE,
1722
} from "./consts";
23+
import { getLocalOption } from "./localstorage";
1824

1925
// @ts-expect-error - TS2554 - Expected 0 arguments, but got 1.
2026
const encoder = new TextEncoder("utf-8");
@@ -34,9 +40,26 @@ function sleep(time) {
3440
return new Promise((resolve) => setTimeout(() => resolve(), time));
3541
}
3642

43+
type FetchEntry = {
44+
url: string;
45+
headers?: Headers;
46+
rangeReplaced?: boolean;
47+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
48+
sessions?: any[];
49+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
50+
pageInfo?: any;
51+
52+
rangeRemoved?: boolean;
53+
doRangeCheck?: boolean;
54+
redirectOnly?: boolean;
55+
};
56+
3757
// ===========================================================================
3858
class Recorder {
39-
recordStorage = true;
59+
archiveStorage = false;
60+
archiveCookies = false;
61+
62+
_fetchQueue: FetchEntry[] = [];
4063

4164
constructor() {
4265
// @ts-expect-error - TS2339 - Property 'flatMode' does not exist on type 'Recorder'.
@@ -79,8 +102,7 @@ class Recorder {
79102

80103
// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
81104
this._fetchPending = new Map();
82-
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
83-
this._fetchQueue = [];
105+
84106
// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
85107
this._fetchUrls = new Set();
86108

@@ -128,6 +150,13 @@ class Recorder {
128150
this.defaultFetchOpts = {
129151
redirect: "manual",
130152
};
153+
154+
this.initOpts();
155+
}
156+
157+
async initOpts() {
158+
this.archiveCookies = (await getLocalOption("archiveCookies") === "1");
159+
this.archiveStorage = (await getLocalOption("archiveStorage") === "1");
131160
}
132161

133162
// @ts-expect-error - TS7006 - Parameter 'autorun' implicitly has an 'any' type.
@@ -860,7 +889,7 @@ class Recorder {
860889
// @ts-expect-error - TS7006 - Parameter 'url' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
861890
handleWindowOpen(url, sessions) {
862891
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
863-
const headers = { Referer: this.pageInfo.url };
892+
const headers = new Headers({ Referer: this.pageInfo.url });
864893
this.doAsyncFetch({ url, headers, redirectOnly: true }, sessions);
865894
}
866895

@@ -1450,8 +1479,12 @@ class Recorder {
14501479
//this._fetchPending.set(requestId, pending);
14511480

14521481
try {
1453-
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
1454-
const data = reqresp.toDBRecord(reqresp.payload, this.pageInfo);
1482+
const data = reqresp.toDBRecord(
1483+
reqresp.payload,
1484+
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
1485+
this.pageInfo,
1486+
this.archiveCookies,
1487+
);
14551488

14561489
// top-level URL is a non-GET request
14571490
if (
@@ -1513,7 +1546,7 @@ class Recorder {
15131546
// eslint-disable-next-line @typescript-eslint/no-explicit-any
15141547
async getStorage(sessions: any) {
15151548
// check if recording storage is allowed
1516-
if (!this.recordStorage) {
1549+
if (!this.archiveStorage) {
15171550
return null;
15181551
}
15191552

@@ -1576,7 +1609,7 @@ class Recorder {
15761609

15771610
reqresp.fillResponseRedirect(params);
15781611
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
1579-
data = reqresp.toDBRecord(null, this.pageInfo);
1612+
data = reqresp.toDBRecord(null, this.pageInfo, this.archiveCookies);
15801613
}
15811614

15821615
reqresp.fillRequest(params);
@@ -1629,14 +1662,14 @@ class Recorder {
16291662
for (const { value } of params.events) {
16301663
if (value.indexOf('"kLoad"') > 0) {
16311664
const { url } = JSON.parse(value);
1632-
this.doAsyncFetch({ url }, sessions);
1665+
this.doAsyncFetch({ url, doRangeCheck: true }, sessions);
16331666
break;
16341667
}
16351668
}
16361669
}
16371670

16381671
// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'resp' implicitly has an 'any' type.
1639-
async attemptFetchRedirect(request, resp) {
1672+
async attemptFetchRedirect(request: FetchEntry, resp) {
16401673
if (request.redirectOnly && resp.type === "opaqueredirect") {
16411674
const abort = new AbortController();
16421675
// @ts-expect-error - TS2345 - Argument of type '{ abort: AbortController; }' is not assignable to parameter of type 'RequestInit'.
@@ -1671,11 +1704,19 @@ class Recorder {
16711704
}
16721705

16731706
// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
1674-
doAsyncFetch(request, sessions) {
1707+
doAsyncFetch(request: FetchEntry, sessions) {
16751708
if (!request || !this.isValidUrl(request.url)) {
16761709
return;
16771710
}
16781711

1712+
if (request.doRangeCheck) {
1713+
const url = removeRangeAsQuery(request.url);
1714+
if (url) {
1715+
request.url = url;
1716+
request.rangeRemoved = true;
1717+
}
1718+
}
1719+
16791720
// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
16801721
if (this._fetchUrls.has(request.url)) {
16811722
console.log("Skipping, already fetching: " + request.url);
@@ -1686,15 +1727,13 @@ class Recorder {
16861727
request.pageInfo = this.pageInfo;
16871728
request.sessions = sessions;
16881729

1689-
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
16901730
this._fetchQueue.push(request);
16911731

16921732
this.doBackgroundFetch();
16931733
}
16941734

16951735
async doBackgroundFetch() {
16961736
if (
1697-
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
16981737
!this._fetchQueue.length ||
16991738
// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
17001739
this._fetchPending.size >= MAX_CONCURRENT_FETCH ||
@@ -1704,8 +1743,10 @@ class Recorder {
17041743
return;
17051744
}
17061745

1707-
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
17081746
const request = this._fetchQueue.shift();
1747+
if (!request) {
1748+
return;
1749+
}
17091750

17101751
// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
17111752
if (this._fetchUrls.has(request.url)) {
@@ -1732,11 +1773,9 @@ class Recorder {
17321773
// @ts-expect-error - TS2339 - Property 'defaultFetchOpts' does not exist on type 'Recorder'.
17331774
const opts = { ...this.defaultFetchOpts };
17341775

1735-
if (request.getRequestHeadersDict) {
1736-
opts.headers = request.getRequestHeadersDict().headers;
1737-
opts.headers.delete("range");
1738-
} else if (request.headers) {
1776+
if (request.headers) {
17391777
opts.headers = request.headers;
1778+
opts.headers.delete("range");
17401779
}
17411780

17421781
let resp = await fetch(request.url, opts);
@@ -1779,8 +1818,12 @@ class Recorder {
17791818
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
17801819
reqresp.payload = new Uint8Array(payload);
17811820

1782-
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
1783-
const data = reqresp.toDBRecord(reqresp.payload, request.pageInfo);
1821+
const data = reqresp.toDBRecord(
1822+
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
1823+
reqresp.payload,
1824+
request.pageInfo,
1825+
this.archiveCookies,
1826+
);
17841827

17851828
if (data) {
17861829
await this.commitResource(data, request.pageInfo);
@@ -1813,9 +1856,36 @@ class Recorder {
18131856
let payload;
18141857

18151858
if (reqresp.status === 206) {
1816-
sleep(500).then(() => this.doAsyncFetch(reqresp, sessions));
1859+
sleep(500).then(() =>
1860+
this.doAsyncFetch(
1861+
{
1862+
url: reqresp.url,
1863+
headers: reqresp.getRequestHeadersDict().headers,
1864+
},
1865+
sessions,
1866+
),
1867+
);
18171868
reqresp.payload = null;
18181869
return null;
1870+
} else {
1871+
const changedUrl = removeRangeAsQuery(reqresp.url);
1872+
1873+
if (changedUrl) {
1874+
reqresp.url = changedUrl;
1875+
this.removeReqResp(reqresp.requestId);
1876+
sleep(500).then(() =>
1877+
this.doAsyncFetch(
1878+
{
1879+
url: changedUrl,
1880+
headers: reqresp.getRequestHeadersDict().headers,
1881+
rangeRemoved: true,
1882+
},
1883+
sessions,
1884+
),
1885+
);
1886+
reqresp.payload = null;
1887+
return null;
1888+
}
18191889
}
18201890

18211891
if (!this.noResponseForStatus(reqresp.status)) {
@@ -1888,9 +1958,13 @@ class Recorder {
18881958
if (reqresp.payload) {
18891959
// @ts-expect-error - TS2571 - Object is of type 'unknown'.
18901960
console.log(`Committing Finished ${id} - ${reqresp.url}`);
1891-
18921961
// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
1893-
const data = reqresp.toDBRecord(reqresp.payload, pageInfo);
1962+
const data = reqresp.toDBRecord(
1963+
// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
1964+
reqresp.payload,
1965+
pageInfo,
1966+
this.archiveCookies,
1967+
);
18941968

18951969
if (data) {
18961970
// @ts-expect-error - TS2554 - Expected 2 arguments, but got 1.

src/requestresponseinfo.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ const encoder = new TextEncoder();
1616

1717
// ===========================================================================
1818
class RequestResponseInfo {
19+
extraOpts: Record<string, string>;
20+
1921
// @ts-expect-error - TS7006 - Parameter 'requestId' implicitly has an 'any' type.
2022
constructor(requestId) {
2123
// @ts-expect-error - TS2339 - Property '_created' does not exist on type 'RequestResponseInfo'.
@@ -70,7 +72,6 @@ class RequestResponseInfo {
7072
// @ts-expect-error - TS2339 - Property 'resourceType' does not exist on type 'RequestResponseInfo'.
7173
this.resourceType = null;
7274

73-
// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
7475
this.extraOpts = {};
7576
}
7677

@@ -212,7 +213,7 @@ class RequestResponseInfo {
212213
}
213214

214215
// @ts-expect-error - TS7006 - Parameter 'payload' implicitly has an 'any' type. | TS7006 - Parameter 'pageInfo' implicitly has an 'any' type.
215-
toDBRecord(payload, pageInfo) {
216+
toDBRecord(payload, pageInfo, allowCookies) {
216217
// don't save 304 (todo: turn into 'revisit' style entry?)
217218
// extra check for 206, should already be skipped
218219
if (
@@ -257,7 +258,11 @@ class RequestResponseInfo {
257258
const cookie = reqHeaders.headers.get("cookie");
258259

259260
if (cookie) {
260-
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
261+
if (allowCookies) {
262+
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
263+
} else {
264+
reqHeaders.headers.delete("cookie");
265+
}
261266
}
262267

263268
// @ts-expect-error - TS2339 - Property 'url' does not exist on type 'RequestResponseInfo'.
@@ -312,7 +317,6 @@ class RequestResponseInfo {
312317
mime,
313318
respHeaders: respHeaders.headersDict,
314319
reqHeaders: reqHeaders.headersDict,
315-
// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
316320
extraOpts: this.extraOpts,
317321
};
318322

0 commit comments

Comments
 (0)