diff --git a/src/crawler.ts b/src/crawler.ts index fe085bd5..f929ae2d 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -1114,7 +1114,11 @@ self.__bx_behaviors.selectMainBehavior(); if (this.params.diskUtilization) { // Check that disk usage isn't already or soon to be above threshold - const diskUtil = await checkDiskUtilization(this.params, size); + const diskUtil = await checkDiskUtilization( + this.collDir, + this.params, + size, + ); if (diskUtil.stop === true) { interrupt = true; } diff --git a/src/util/storage.ts b/src/util/storage.ts index f89d3860..b37f2aaf 100644 --- a/src/util/storage.ts +++ b/src/util/storage.ts @@ -202,6 +202,7 @@ export async function getDirSize(dir: string) { } export async function checkDiskUtilization( + collDir: string, // TODO: Fix this the next time the file is edited. // eslint-disable-next-line @typescript-eslint/no-explicit-any params: Record, @@ -209,7 +210,7 @@ export async function checkDiskUtilization( dfOutput = null, ) { const diskUsage: Record = await getDiskUsage( - "/crawls", + collDir, dfOutput, ); const usedPercentage = parseInt(diskUsage["Use%"].slice(0, -1)); diff --git a/tests/storage.test.js b/tests/storage.test.js index a5c7f783..215f22c4 100644 --- a/tests/storage.test.js +++ b/tests/storage.test.js @@ -29,6 +29,7 @@ grpcfuse 1000000 285000 715000 28% /crawls`; // with combineWARC + generateWACZ, projected is 285k + 4 * 5k = 310k = 31% // does not exceed 90% threshold const returnValue = await checkDiskUtilization( + '/crawls', params, 5000 * 1024, mockDfOutput, @@ -55,6 +56,7 @@ grpcfuse 100000 85000 15000 85% /crawls`; // with generateWACZ, projected is 85k + 3k x 2 = 91k = 91% // exceeds 90% threshold const returnValue = await checkDiskUtilization( + '/crawls', params, 3000 * 1024, mockDfOutput,