Skip to content

Commit 0b31423

Browse files
authored
fix: Fix read file fn which uses double the memory (#886)
* fix: Fix read file fn which uses double the memory * Development snapshot * Development snapshot
1 parent 1455243 commit 0b31423

File tree

3 files changed

+37
-11
lines changed

3 files changed

+37
-11
lines changed

src/fs.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ export default {
55
existsSync: fs.existsSync,
66
readFile: util.promisify(fs.readFile),
77
watchFile: fs.watchFile,
8-
createReadStream: fs.createReadStream,
8+
createReadStream: fs.createReadStream,
9+
stat: util.promisify(fs.stat),
910
};

src/index.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ describe('index', () => {
1919
watchHandler = cb;
2020
});
2121
sandbox.spy(fs, 'createReadStream');
22+
sandbox.spy(fs, 'readFile');
2223
});
2324
afterEach(() => {
2425
sandbox.restore();
@@ -51,17 +52,17 @@ describe('index', () => {
5152
const lookup = await maxmind.open(dbPath, options);
5253
assert(lookup.get('2001:230::'));
5354
assert((fs.watchFile as SinonSpy).calledOnce);
54-
assert((fs.createReadStream as SinonSpy).calledOnce);
55+
assert((fs.readFile as SinonSpy).calledOnce);
5556
});
5657

5758
it('should work with auto updates', async () => {
5859
const options = { watchForUpdates: true };
5960
const lookup = await maxmind.open(dbPath, options);
6061
assert(lookup.get('2001:230::'));
6162
assert((fs.watchFile as SinonSpy).calledOnce);
62-
assert((fs.createReadStream as SinonSpy).calledOnce);
63+
assert((fs.readFile as SinonSpy).calledOnce);
6364
await watchHandler();
64-
assert((fs.createReadStream as SinonSpy).calledTwice);
65+
assert((fs.readFile as SinonSpy).calledTwice);
6566
});
6667

6768
it('should work with auto updates and call specified hook', async () => {

src/index.ts

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ import ip from './ip';
66
import isGzip from './is-gzip';
77
import utils from './utils';
88

9+
const LARGE_FILE_THRESHOLD = 512 * 1024 * 1024;
10+
const STREAM_WATERMARK = 8 * 1024 * 1024;
11+
912
type Callback = () => void;
1013

1114
export interface OpenOpts {
@@ -17,27 +20,48 @@ export interface OpenOpts {
1720
watchForUpdatesHook?: Callback;
1821
}
1922

20-
const readFile = async (filepath: string): Promise<Buffer> => {
21-
return new Promise((resolve, reject) => {
22-
const chunks: Buffer[] = [];
23+
/**
24+
* Read large file in chunks.
25+
*
26+
* Reason it's not used for all file sizes is that it's slower than fs.readFile and uses
27+
* a bit more memory due to the buffer operations.
28+
*
29+
* Node seems to have a limit of 2GB for fs.readFileSync, so we need to use streams for
30+
* larger files.
31+
*
32+
* @param filepath
33+
* @param size
34+
* @returns
35+
*/
36+
const readLargeFile = async (filepath: string, size: number): Promise<Buffer> =>
37+
new Promise((resolve, reject) => {
38+
let buffer = Buffer.allocUnsafe(size);
39+
let offset = 0;
2340
const stream = fs.createReadStream(filepath, {
24-
highWaterMark: 64 * 1024 * 1024, // 64 MB chunks
41+
highWaterMark: STREAM_WATERMARK,
2542
});
2643

2744
stream.on('data', (chunk: Buffer) => {
28-
chunks.push(chunk);
45+
chunk.copy(buffer, offset);
46+
offset += chunk.length;
2947
});
3048

3149
stream.on('end', () => {
32-
resolve(Buffer.concat(chunks));
50+
stream.close();
51+
resolve(buffer);
3352
});
3453

3554
stream.on('error', (err) => {
3655
reject(err);
3756
});
3857
});
39-
};
4058

59+
const readFile = async (filepath: string): Promise<Buffer> => {
60+
const fstat = await fs.stat(filepath);
61+
return fstat.size < LARGE_FILE_THRESHOLD
62+
? fs.readFile(filepath)
63+
: readLargeFile(filepath, fstat.size);
64+
};
4165

4266
export const open = async <T extends Response>(
4367
filepath: string,

0 commit comments

Comments
 (0)