Skip to content

Commit 23eae21

Browse files
authored
ci(md): Add caching to md-export script (#14096)
Should cut down build times significantly after the first run if the cache persists in Vercel Locally my times went from 60 seconds down to 8 seconds.
1 parent 6556d91 commit 23eae21

File tree

1 file changed

+59
-24
lines changed

1 file changed

+59
-24
lines changed

scripts/generate-md-exports.mjs

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/usr/bin/env node
22

3-
import {fileURLToPath} from 'url';
4-
53
import {selectAll} from 'hast-util-select';
6-
import {existsSync} from 'node:fs';
7-
import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises';
4+
import {createHash} from 'node:crypto';
5+
import {constants as fsConstants, existsSync} from 'node:fs';
6+
import {copyFile, mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises';
87
import {cpus} from 'node:os';
98
import * as path from 'node:path';
9+
import {fileURLToPath} from 'node:url';
1010
import {isMainThread, parentPort, Worker, workerData} from 'node:worker_threads';
1111
import rehypeParse from 'rehype-parse';
1212
import rehypeRemark from 'rehype-remark';
@@ -15,6 +15,16 @@ import remarkStringify from 'remark-stringify';
1515
import {unified} from 'unified';
1616
import {remove} from 'unist-util-remove';
1717

18+
function taskFinishHandler(data) {
19+
if (data.failedTasks.length === 0) {
20+
console.log(`✅ Worker[${data.id}]: ${data.success} files successfully.`);
21+
} else {
22+
hasErrors = true;
23+
console.error(`❌ Worker[${data.id}]: ${data.failedTasks.length} files failed:`);
24+
console.error(data.failedTasks);
25+
}
26+
}
27+
1828
async function createWork() {
1929
let root = process.cwd();
2030
while (!existsSync(path.join(root, 'package.json'))) {
@@ -27,6 +37,13 @@ async function createWork() {
2737
const INPUT_DIR = path.join(root, '.next', 'server', 'app');
2838
const OUTPUT_DIR = path.join(root, 'public', 'md-exports');
2939

40+
const CACHE_VERSION = 1;
41+
const CACHE_DIR = path.join(root, '.next', 'cache', 'md-exports', `v${CACHE_VERSION}`);
42+
const noCache = !existsSync(CACHE_DIR);
43+
if (noCache) {
44+
await mkdir(CACHE_DIR, {recursive: true});
45+
}
46+
3047
console.log(`🚀 Starting markdown generation from: ${INPUT_DIR}`);
3148
console.log(`📁 Output directory: ${OUTPUT_DIR}`);
3249

@@ -63,40 +80,54 @@ async function createWork() {
6380
console.log(`📄 Converting ${numFiles} files with ${numWorkers} workers...`);
6481

6582
const selfPath = fileURLToPath(import.meta.url);
66-
const workerPromises = new Array(numWorkers - 1).fill(null).map((_, idx) => {
83+
const workerPromises = new Array(numWorkers - 1).fill(null).map((_, id) => {
6784
return new Promise((resolve, reject) => {
68-
const worker = new Worker(selfPath, {workerData: workerTasks[idx]});
69-
let hasErrors = false;
70-
worker.on('message', data => {
71-
if (data.failedTasks.length === 0) {
72-
console.log(`✅ Worker[${idx}]: ${data.success} files successfully.`);
73-
} else {
74-
hasErrors = true;
75-
console.error(`❌ Worker[${idx}]: ${data.failedTasks.length} files failed:`);
76-
console.error(data.failedTasks);
77-
}
85+
const worker = new Worker(selfPath, {
86+
workerData: {id, noCache, cacheDir: CACHE_DIR, tasks: workerTasks[id]},
7887
});
88+
let hasErrors = false;
89+
worker.on('message', taskFinishHandler);
7990
worker.on('error', reject);
8091
worker.on('exit', code => {
8192
if (code !== 0) {
82-
reject(new Error(`Worker[${idx}] stopped with exit code ${code}`));
93+
reject(new Error(`Worker[${id}] stopped with exit code ${code}`));
8394
} else {
84-
hasErrors ? reject(new Error(`Worker[${idx}] had some errors.`)) : resolve();
95+
hasErrors ? reject(new Error(`Worker[${id}] had some errors.`)) : resolve();
8596
}
8697
});
8798
});
8899
});
89100
// The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson
90-
workerPromises.push(processTaskList(workerTasks[workerTasks.length - 1]));
101+
workerPromises.push(
102+
processTaskList({
103+
noCache,
104+
cacheDir: CACHE_DIR,
105+
tasks: workerTasks[workerTasks.length - 1],
106+
id: workerTasks.length - 1,
107+
}).then(taskFinishHandler)
108+
);
91109

92110
await Promise.all(workerPromises);
93111

94112
console.log(`📄 Generated ${numFiles} markdown files from HTML.`);
95113
console.log('✅ Markdown export generation complete!');
96114
}
97115

98-
async function genMDFromHTML(source, target) {
116+
const md5 = data => createHash('md5').update(data).digest('hex');
117+
118+
async function genMDFromHTML(source, target, {cacheDir, noCache}) {
99119
const text = await readFile(source, {encoding: 'utf8'});
120+
const hash = md5(text);
121+
const cacheFile = path.join(cacheDir, hash);
122+
if (!noCache) {
123+
try {
124+
await copyFile(cacheFile, target, fsConstants.COPYFILE_FICLONE);
125+
return;
126+
} catch {
127+
// pass
128+
}
129+
}
130+
100131
await writeFile(
101132
target,
102133
String(
@@ -125,22 +156,26 @@ async function genMDFromHTML(source, target) {
125156
.process(text)
126157
)
127158
);
159+
await copyFile(target, cacheFile, fsConstants.COPYFILE_FICLONE);
128160
}
129161

130-
async function processTaskList(tasks) {
162+
async function processTaskList({id, tasks, cacheDir, noCache}) {
131163
const failedTasks = [];
132164
for (const {sourcePath, targetPath} of tasks) {
133165
try {
134-
await genMDFromHTML(sourcePath, targetPath);
166+
await genMDFromHTML(sourcePath, targetPath, {
167+
cacheDir,
168+
noCache,
169+
});
135170
} catch (error) {
136171
failedTasks.push({sourcePath, targetPath, error});
137172
}
138173
}
139-
return {success: tasks.length - failedTasks.length, failedTasks};
174+
return {id, success: tasks.length - failedTasks.length, failedTasks};
140175
}
141176

142-
async function doWork(tasks) {
143-
parentPort.postMessage(await processTaskList(tasks));
177+
async function doWork(work) {
178+
parentPort.postMessage(await processTaskList(work));
144179
}
145180

146181
if (isMainThread) {

0 commit comments

Comments
 (0)