diff --git a/package.json b/package.json index 86922a14..fb4c0089 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-crawler", - "version": "1.6.0", + "version": "1.6.1", "main": "browsertrix-crawler", "type": "module", "repository": "https://github.com/webrecorder/browsertrix-crawler", diff --git a/src/crawler.ts b/src/crawler.ts index 61c7516f..2b46b6b9 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -1497,6 +1497,10 @@ self.__bx_behaviors.selectMainBehavior(); } } + if (await this.crawlState.isCrawlPaused()) { + interrupt = InterruptReason.CrawlPaused; + } + if (interrupt) { this.uploadAndDeleteLocal = true; this.gracefulFinishOnInterrupt(interrupt); @@ -1859,12 +1863,9 @@ self.__bx_behaviors.selectMainBehavior(); if (isFinished || (await this.crawlState.isCrawlCanceled())) { return; } - // if stopped, won't get anymore data - if (await this.crawlState.isCrawlStopped()) { - // possibly restarted after committing, so assume done here! - if ((await this.crawlState.numDone()) > 0) { - return; - } + // possibly restarted after committing, so assume done here! + if ((await this.crawlState.numDone()) > 0) { + return; } // fail crawl otherwise logger.fatal("No WARC Files, assuming crawl failed"); diff --git a/src/util/constants.ts b/src/util/constants.ts index 0f75df73..d6185d4e 100644 --- a/src/util/constants.ts +++ b/src/util/constants.ts @@ -88,4 +88,5 @@ export enum InterruptReason { DiskUtilization = 4, BrowserCrashed = 5, SignalInterrupted = 6, + CrawlPaused = 7, } diff --git a/src/util/state.ts b/src/util/state.ts index b1922b44..30e280ff 100644 --- a/src/util/state.ts +++ b/src/util/state.ts @@ -521,6 +521,14 @@ return inx; return false; } + async isCrawlPaused() { + if ((await this.redis.get(`${this.key}:paused`)) === "1") { + return true; + } + + return false; + } + async isCrawlCanceled() { return (await this.redis.get(`${this.key}:canceled`)) === "1"; }