Skip to content

Commit 7e4ec3e

Browse files
committed
Rewrite docs linter
1. I want to set the stage to deal with #2898 properly. 2. `request` was deprecated years ago. Decided that it's better to just move to native Node.js APIs in its place. 3. `glob` was outdated, and it's easier to just toss it than to upgrade across a major version. 4. I switched to using Marked's "lexer" directly so I'm not fussing with the complexity of renderers. This of course necessitated a more complex file processor as its "lexer" is really an AST parser. I also decided to go a few steps further: - Drop the cache to simplify everything. I might reverse this later, but just caching URLs per-page should be enough to prevent the world from crashing down. - Drop some more dependencies, so I don't have to come back to this later nearly as quickly. - Upgrade to a more modern language version in the scripts. - Update Marked. It was super outdated. - Add line and column numbers to the warnings. That took quite a bit of work, thanks to a missing Marked feature plus a bug in Marked.
1 parent f6f0928 commit 7e4ec3e

12 files changed

+1189
-315
lines changed

.eslintrc.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@ module.exports = {
44
"extends": "../.eslintrc.js",
55
"env": {
66
"browser": null,
7+
"node": true,
8+
"es2022": true,
79
},
810
"parserOptions": {
9-
"ecmaVersion": 2019,
11+
"ecmaVersion": 2022,
1012
},
1113
"rules": {
1214
"no-process-env": "off",

_improve-rejection-crashing.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"use strict"
2+
3+
process.on("unhandledRejection", (e) => {
4+
process.exitCode = 1
5+
6+
if (!e.stdout || !e.stderr) throw e
7+
8+
console.error(e.stack)
9+
10+
if (e.stdout?.length) {
11+
console.error(e.stdout.toString("utf-8"))
12+
}
13+
14+
if (e.stderr?.length) {
15+
console.error(e.stderr.toString("utf-8"))
16+
}
17+
18+
// eslint-disable-next-line no-process-exit
19+
process.exit()
20+
})

_lint-docs/decode-response.js

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
// Disabling this globally as I use it a lot to speed up common operations and cut down on
2+
// duplicate comparisons.
3+
/* eslint-disable no-bitwise */
4+
"use strict"
5+
6+
const win1252Map = [
7+
0x20AC,
8+
0x81,
9+
0x201A,
10+
0x0192,
11+
0x201E,
12+
0x2026,
13+
0x2020,
14+
0x2021,
15+
0x02C6,
16+
0x2030,
17+
0x0160,
18+
0x2039,
19+
0x0152,
20+
0x8D,
21+
0x017D,
22+
0x8F,
23+
0x90,
24+
0x2018,
25+
0x2019,
26+
0x201C,
27+
0x201D,
28+
0x2022,
29+
0x2013,
30+
0x2014,
31+
0x02DC,
32+
0x2122,
33+
0x0161,
34+
0x203A,
35+
0x0153,
36+
0x9D,
37+
0x017E,
38+
0x0178,
39+
]
40+
41+
function decode(buffer, encoding) {
42+
switch (encoding) {
43+
case "utf16be":
44+
buffer.swap16()
45+
encoding = "utf16le"
46+
break
47+
48+
case "win1252":
49+
encoding = "latin1"
50+
for (let i = 0; i < buffer.length; i++) {
51+
const value = buffer[i]
52+
if ((value & 0xE0) === 0x80) {
53+
const u16 = new Uint16Array(buffer.length)
54+
u16.set(buffer.subarray(0, i), 0)
55+
for (; i < buffer.length; i++) {
56+
const value = buffer[i]
57+
const mask = -((value & 0xE0) === 0x80)
58+
u16[i] = value & ~mask | win1252Map[value & 0x1F] & mask
59+
}
60+
buffer = Buffer.from(u16.buffer)
61+
encoding = "utf16le"
62+
break
63+
}
64+
}
65+
break
66+
}
67+
68+
return buffer.toString(encoding)
69+
}
70+
71+
// Ref: https://encoding.spec.whatwg.org/#concept-encoding-get
72+
/** @type {Array<["utf8" | "utf16le" | "utf16be" | "win1252", string]>} */
73+
const encodingMap = [
74+
["utf8", "UNICODE11UTF8"],
75+
["utf8", "UNICODE20UTF8"],
76+
["utf8", "UNICODE-1-1-UTF-8"],
77+
["utf8", "UTF8"],
78+
["utf8", "UTF-8"],
79+
["utf8", "X-UNICODE20UTF8"],
80+
["win1252", "ANSI_X3.4-1968"],
81+
["win1252", "ASCII"],
82+
["win1252", "CP1252"],
83+
["win1252", "CP819"],
84+
["win1252", "CSISOLATIN1"],
85+
["win1252", "IBM819"],
86+
["win1252", "ISO-8859-1"],
87+
["win1252", "ISO-IR-100"],
88+
["win1252", "ISO8859-1"],
89+
["win1252", "ISO88591"],
90+
["win1252", "ISO_8859-1"],
91+
["win1252", "ISO_8859-1:1987"],
92+
["win1252", "L1"],
93+
["win1252", "LATIN1"],
94+
["win1252", "US-ASCII"],
95+
["win1252", "WINDOWS-1252"],
96+
["win1252", "X-CP1252"],
97+
["utf16be", "UNICODEFFFE"],
98+
["utf16be", "UTF-16BE"],
99+
["utf16le", "CSUNICODE"],
100+
["utf16le", "ISO-10646-UCS-2"],
101+
["utf16le", "UCS-2"],
102+
["utf16le", "UNICODE"],
103+
["utf16le", "UNICODEFEFF"],
104+
["utf16le", "UTF-16"],
105+
["utf16le", "UTF-16LE"],
106+
]
107+
108+
function extractNamedEncoding(name) {
109+
outer:
110+
for (const entry of encodingMap) {
111+
const expected = entry[1]
112+
if (expected.length !== name.length) continue
113+
for (let i = 0; i < name.length; i++) {
114+
let ch = expected.charCodeAt(i)
115+
const upper = ch & ~0x20
116+
if (upper >= 0x41 && upper <= 0x5A) ch = upper
117+
if (name.charCodeAt(i) !== expected) continue outer
118+
}
119+
return entry[0]
120+
}
121+
return undefined
122+
}
123+
124+
function isAsciiWhitespace(ch) {
125+
const mask = (
126+
1 << (0x09 - 1) |
127+
1 << (0x0A - 1) |
128+
1 << (0x0C - 1) |
129+
1 << (0x0D - 1) |
130+
1 << (0x20 - 1)
131+
)
132+
133+
ch |= 0
134+
return ch < 0x20 && (mask >>> (ch - 1) & 1) !== 0
135+
}
136+
137+
function startsWith(buffer, i, end, sequence) {
138+
if (buffer.length < i + sequence.length) return false
139+
140+
for (let j = 0; j < sequence.length && i < end; i++, j++) {
141+
let ch = sequence.charCodeAt(j)
142+
if (ch === 0x20) {
143+
if (!isAsciiWhitespace(buffer[i++])) return false
144+
while (i < buffer.length && isAsciiWhitespace(buffer[i])) i++
145+
} else {
146+
const upper = ch & ~0x20
147+
if (upper >= 0x41 && upper <= 0x5A) ch = upper
148+
if (ch !== buffer[i]) return false
149+
}
150+
}
151+
152+
return true
153+
}
154+
155+
const metasToCheck = encodingMap.flatMap(([e, n]) => [
156+
[e, `charset=${n}>`],
157+
[e, `charset="${n}">`],
158+
[e, `charset='${n}'>`],
159+
[e, `charset=${n}/>`],
160+
[e, `charset="${n}"/>`],
161+
[e, `charset='${n}'/>`],
162+
[e, `http-equiv=content-type content=${n}>`],
163+
[e, `http-equiv="content-type" content=${n}>`],
164+
[e, `http-equiv='content-type' content=${n}>`],
165+
[e, `http-equiv=content-type content="${n}">`],
166+
[e, `http-equiv="content-type" content="${n}">`],
167+
[e, `http-equiv='content-type' content="${n}">`],
168+
[e, `http-equiv=content-type content='${n}'>`],
169+
[e, `http-equiv="content-type" content='${n}'>`],
170+
[e, `http-equiv='content-type' content='${n}'>`],
171+
[e, `http-equiv=content-type content=${n}/>`],
172+
[e, `http-equiv="content-type" content=${n}/>`],
173+
[e, `http-equiv='content-type' content=${n}/>`],
174+
[e, `http-equiv=content-type content="${n}"/>`],
175+
[e, `http-equiv="content-type" content="${n}"/>`],
176+
[e, `http-equiv='content-type' content="${n}"/>`],
177+
[e, `http-equiv=content-type content='${n}'/>`],
178+
[e, `http-equiv="content-type" content='${n}'/>`],
179+
[e, `http-equiv='content-type' content='${n}'/>`],
180+
])
181+
182+
function extractMetaEncoding(buffer, i, end) {
183+
// Exceptionally lazy and not quite fully correct
184+
for (const [encoding, meta] of metasToCheck) {
185+
if (startsWith(buffer, i, end, meta)) return encoding
186+
}
187+
return undefined
188+
}
189+
190+
/**
191+
* @returns {"utf8" | "utf16le" | "utf16be" | "win1252"}
192+
*/
193+
function detectEncoding(headers, prefix) {
194+
// This follows the HTML spec to the extent Node supports the various encodings. I'm *not*,
195+
// however, going to bend over backwards to support obscure encodings.
196+
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
197+
198+
if (startsWith(prefix, 0, prefix.length, "\xEF\xBB\xBF")) return "utf8"
199+
if (startsWith(prefix, 0, prefix.length, "\xFE\xFF")) return "utf16le"
200+
if (startsWith(prefix, 0, prefix.length, "\xFF\xFE")) return "utf16be"
201+
202+
const contentType = headers["content-type"]
203+
if (contentType) {
204+
const result = (/;\s*charset="?([\w-]+)"?/i).exec(contentType)
205+
if (result) {
206+
const encoding = extractNamedEncoding(result[1])
207+
if (encoding) return encoding
208+
}
209+
}
210+
211+
if (startsWith(prefix, 0, prefix.length, "\x3c\x00\x3F\x00\x78\x00")) return "utf16le"
212+
if (startsWith(prefix, 0, prefix.length, "\x00\x3c\x00\x3F\x00\x78")) return "utf16be"
213+
214+
for (let i = 0, end = prefix.indexOf("<!--", 0, "latin1"); i < prefix.length;) {
215+
if (i === end) {
216+
i = prefix.indexOf("-->", i + 4, "latin1")
217+
if (i < 0) return undefined
218+
i += 3
219+
end = prefix.indexOf("<!--", i, "latin1")
220+
} else if (prefix[i] === 0x3C) {
221+
i++
222+
if (i === prefix.length) return "win1252"
223+
224+
if (startsWith(prefix, i, end, "meta ")) {
225+
const encoding = extractMetaEncoding(prefix, i, end)
226+
if (encoding) return encoding
227+
} else if (prefix[i] === 0x21 || prefix[i] === 0x2F || prefix[i] === 0x3F) {
228+
i = prefix.indexOf(0x3E, i)
229+
if (i < 0) return "win1252"
230+
i++
231+
}
232+
}
233+
}
234+
235+
return "win1252"
236+
}
237+
238+
function decodeResponse(headers, body) {
239+
return decode(body, detectEncoding(headers, body.subarray(0, 1024)))
240+
}
241+
242+
module.exports = {
243+
decodeResponse,
244+
}

0 commit comments

Comments
 (0)