1
1
#!/usr/bin/env node
2
2
3
- import { fileURLToPath } from 'url' ;
4
-
5
3
import { selectAll } from 'hast-util-select' ;
6
- import { existsSync } from 'node:fs' ;
7
- import { mkdir , opendir , readFile , rm , writeFile } from 'node:fs/promises' ;
4
+ import { createHash } from 'node:crypto' ;
5
+ import { constants as fsConstants , existsSync } from 'node:fs' ;
6
+ import { copyFile , mkdir , opendir , readFile , rm , writeFile } from 'node:fs/promises' ;
8
7
import { cpus } from 'node:os' ;
9
8
import * as path from 'node:path' ;
9
+ import { fileURLToPath } from 'node:url' ;
10
10
import { isMainThread , parentPort , Worker , workerData } from 'node:worker_threads' ;
11
11
import rehypeParse from 'rehype-parse' ;
12
12
import rehypeRemark from 'rehype-remark' ;
@@ -15,6 +15,16 @@ import remarkStringify from 'remark-stringify';
15
15
import { unified } from 'unified' ;
16
16
import { remove } from 'unist-util-remove' ;
17
17
18
+ function taskFinishHandler ( data ) {
19
+ if ( data . failedTasks . length === 0 ) {
20
+ console . log ( `✅ Worker[${ data . id } ]: ${ data . success } files successfully.` ) ;
21
+ } else {
22
+ hasErrors = true ;
23
+ console . error ( `❌ Worker[${ data . id } ]: ${ data . failedTasks . length } files failed:` ) ;
24
+ console . error ( data . failedTasks ) ;
25
+ }
26
+ }
27
+
18
28
async function createWork ( ) {
19
29
let root = process . cwd ( ) ;
20
30
while ( ! existsSync ( path . join ( root , 'package.json' ) ) ) {
@@ -27,6 +37,13 @@ async function createWork() {
27
37
const INPUT_DIR = path . join ( root , '.next' , 'server' , 'app' ) ;
28
38
const OUTPUT_DIR = path . join ( root , 'public' , 'md-exports' ) ;
29
39
40
+ const CACHE_VERSION = 1 ;
41
+ const CACHE_DIR = path . join ( root , '.next' , 'cache' , 'md-exports' , `v${ CACHE_VERSION } ` ) ;
42
+ const noCache = ! existsSync ( CACHE_DIR ) ;
43
+ if ( noCache ) {
44
+ await mkdir ( CACHE_DIR , { recursive : true } ) ;
45
+ }
46
+
30
47
console . log ( `🚀 Starting markdown generation from: ${ INPUT_DIR } ` ) ;
31
48
console . log ( `📁 Output directory: ${ OUTPUT_DIR } ` ) ;
32
49
@@ -63,40 +80,54 @@ async function createWork() {
63
80
console . log ( `📄 Converting ${ numFiles } files with ${ numWorkers } workers...` ) ;
64
81
65
82
const selfPath = fileURLToPath ( import . meta. url ) ;
66
- const workerPromises = new Array ( numWorkers - 1 ) . fill ( null ) . map ( ( _ , idx ) => {
83
+ const workerPromises = new Array ( numWorkers - 1 ) . fill ( null ) . map ( ( _ , id ) => {
67
84
return new Promise ( ( resolve , reject ) => {
68
- const worker = new Worker ( selfPath , { workerData : workerTasks [ idx ] } ) ;
69
- let hasErrors = false ;
70
- worker . on ( 'message' , data => {
71
- if ( data . failedTasks . length === 0 ) {
72
- console . log ( `✅ Worker[${ idx } ]: ${ data . success } files successfully.` ) ;
73
- } else {
74
- hasErrors = true ;
75
- console . error ( `❌ Worker[${ idx } ]: ${ data . failedTasks . length } files failed:` ) ;
76
- console . error ( data . failedTasks ) ;
77
- }
85
+ const worker = new Worker ( selfPath , {
86
+ workerData : { id, noCache, cacheDir : CACHE_DIR , tasks : workerTasks [ id ] } ,
78
87
} ) ;
88
+ let hasErrors = false ;
89
+ worker . on ( 'message' , taskFinishHandler ) ;
79
90
worker . on ( 'error' , reject ) ;
80
91
worker . on ( 'exit' , code => {
81
92
if ( code !== 0 ) {
82
- reject ( new Error ( `Worker[${ idx } ] stopped with exit code ${ code } ` ) ) ;
93
+ reject ( new Error ( `Worker[${ id } ] stopped with exit code ${ code } ` ) ) ;
83
94
} else {
84
- hasErrors ? reject ( new Error ( `Worker[${ idx } ] had some errors.` ) ) : resolve ( ) ;
95
+ hasErrors ? reject ( new Error ( `Worker[${ id } ] had some errors.` ) ) : resolve ( ) ;
85
96
}
86
97
} ) ;
87
98
} ) ;
88
99
} ) ;
89
100
// The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson
90
- workerPromises . push ( processTaskList ( workerTasks [ workerTasks . length - 1 ] ) ) ;
101
+ workerPromises . push (
102
+ processTaskList ( {
103
+ noCache,
104
+ cacheDir : CACHE_DIR ,
105
+ tasks : workerTasks [ workerTasks . length - 1 ] ,
106
+ id : workerTasks . length - 1 ,
107
+ } ) . then ( taskFinishHandler )
108
+ ) ;
91
109
92
110
await Promise . all ( workerPromises ) ;
93
111
94
112
console . log ( `📄 Generated ${ numFiles } markdown files from HTML.` ) ;
95
113
console . log ( '✅ Markdown export generation complete!' ) ;
96
114
}
97
115
98
- async function genMDFromHTML ( source , target ) {
116
+ const md5 = data => createHash ( 'md5' ) . update ( data ) . digest ( 'hex' ) ;
117
+
118
+ async function genMDFromHTML ( source , target , { cacheDir, noCache} ) {
99
119
const text = await readFile ( source , { encoding : 'utf8' } ) ;
120
+ const hash = md5 ( text ) ;
121
+ const cacheFile = path . join ( cacheDir , hash ) ;
122
+ if ( ! noCache ) {
123
+ try {
124
+ await copyFile ( cacheFile , target , fsConstants . COPYFILE_FICLONE ) ;
125
+ return ;
126
+ } catch {
127
+ // pass
128
+ }
129
+ }
130
+
100
131
await writeFile (
101
132
target ,
102
133
String (
@@ -125,22 +156,26 @@ async function genMDFromHTML(source, target) {
125
156
. process ( text )
126
157
)
127
158
) ;
159
+ await copyFile ( target , cacheFile , fsConstants . COPYFILE_FICLONE ) ;
128
160
}
129
161
130
- async function processTaskList ( tasks ) {
162
+ async function processTaskList ( { id , tasks, cacheDir , noCache } ) {
131
163
const failedTasks = [ ] ;
132
164
for ( const { sourcePath, targetPath} of tasks ) {
133
165
try {
134
- await genMDFromHTML ( sourcePath , targetPath ) ;
166
+ await genMDFromHTML ( sourcePath , targetPath , {
167
+ cacheDir,
168
+ noCache,
169
+ } ) ;
135
170
} catch ( error ) {
136
171
failedTasks . push ( { sourcePath, targetPath, error} ) ;
137
172
}
138
173
}
139
- return { success : tasks . length - failedTasks . length , failedTasks} ;
174
+ return { id , success : tasks . length - failedTasks . length , failedTasks} ;
140
175
}
141
176
142
- async function doWork ( tasks ) {
143
- parentPort . postMessage ( await processTaskList ( tasks ) ) ;
177
+ async function doWork ( work ) {
178
+ parentPort . postMessage ( await processTaskList ( work ) ) ;
144
179
}
145
180
146
181
if ( isMainThread ) {
0 commit comments