From 41c2cadccb9df98b2d917e3e0ab607d63fd6c318 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 28 May 2024 08:42:55 +0000 Subject: [PATCH] Consider disk usage of collDir instead of default /crawls --- src/crawler.ts | 6 +++++- src/util/storage.ts | 3 ++- tests/storage.test.js | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/crawler.ts b/src/crawler.ts index fe085bd58..f929ae2d9 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -1114,7 +1114,11 @@ self.__bx_behaviors.selectMainBehavior(); if (this.params.diskUtilization) { // Check that disk usage isn't already or soon to be above threshold - const diskUtil = await checkDiskUtilization(this.params, size); + const diskUtil = await checkDiskUtilization( + this.collDir, + this.params, + size, + ); if (diskUtil.stop === true) { interrupt = true; } diff --git a/src/util/storage.ts b/src/util/storage.ts index f89d38601..b37f2aafb 100644 --- a/src/util/storage.ts +++ b/src/util/storage.ts @@ -202,6 +202,7 @@ export async function getDirSize(dir: string) { } export async function checkDiskUtilization( + collDir: string, // TODO: Fix this the next time the file is edited. // eslint-disable-next-line @typescript-eslint/no-explicit-any params: Record, @@ -209,7 +210,7 @@ export async function checkDiskUtilization( dfOutput = null, ) { const diskUsage: Record = await getDiskUsage( - "/crawls", + collDir, dfOutput, ); const usedPercentage = parseInt(diskUsage["Use%"].slice(0, -1)); diff --git a/tests/storage.test.js b/tests/storage.test.js index a5c7f7839..215f22c4b 100644 --- a/tests/storage.test.js +++ b/tests/storage.test.js @@ -29,6 +29,7 @@ grpcfuse 1000000 285000 715000 28% /crawls`; // with combineWARC + generateWACZ, projected is 285k + 4 * 5k = 310k = 31% // does not exceed 90% threshold const returnValue = await checkDiskUtilization( + '/crawls', params, 5000 * 1024, mockDfOutput, @@ -55,6 +56,7 @@ grpcfuse 100000 85000 15000 85% /crawls`; // with generateWACZ, projected is 85k + 3k x 2 = 91k = 91% // exceeds 90% threshold const returnValue = await checkDiskUtilization( + '/crawls', params, 3000 * 1024, mockDfOutput,