From c97691c8efc3cf72862593059f22a451e7b0a79e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 6 Nov 2024 10:43:20 -0500 Subject: [PATCH 1/6] perf: avoid large hole array in bidiMirroringGlyphMap In #80 we made a performance regression that the bidiMirroringGlyphMap is generated in the property iteration. This is also fixed. --- scripts/utils.js | 69 ++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/scripts/utils.js b/scripts/utils.js index cdd42f0..bc9314b 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -84,7 +84,13 @@ const writeFiles = function(options) { return; } const dirMap = {}; - const bidiMirroringGlyphMap = []; + /** + * A list of flatten (x, y) pairs, + * where x is a codepoint, y := codepoint(z) - x, + * where z is the BidiMirroringGlyph of character(x) and codepoint(z) > x + * @type number[] + */ + const bidiMirroringGlyphFlatPairs = []; const auxMap = {}; Object.keys(map).forEach(function(item) { const codePoints = map[item]; @@ -112,10 +118,11 @@ const writeFiles = function(options) { ) ) { if (type == 'Bidi_Mirroring_Glyph') { - const shortName = item.codePointAt(0); + const toCodepoint = item.codePointAt(0); codePoints.toArray().forEach(function(codePoint) { - console.assert(!bidiMirroringGlyphMap[codePoint]); - bidiMirroringGlyphMap[codePoint] = shortName; + if (codePoint < toCodepoint) { + bidiMirroringGlyphFlatPairs.push(codePoint, toCodepoint - codePoint); + } }); } else { if (!auxMap[type]) { @@ -124,34 +131,7 @@ const writeFiles = function(options) { auxMap[type].push([item, codePoints]); } } - if (isNamesCanon) { - return; - } - - if (type == 'Bidi_Mirroring_Glyph') { - const dir = path.resolve( - __dirname, '..', - 'output', 'unicode-' + version, type - ); - if (!hasKey(dirMap, type)) { - dirMap[type] = []; - } - fs.mkdirSync(dir, { recursive: true }); - // `Bidi_Mirroring_Glyph/index.js` - // Note: `Bidi_Mirroring_Glyph` doesn’t have repeated strings; don’t gzip. - const flatPairs = bidiMirroringGlyphMap - .flatMap((a, b) => a < b ? [a, b - a] : []); - const output = [ - `const chr=String.fromCodePoint`, - `const pair=(t,u,v)=>[t?u+v:v,chr(t?u:u+v)]`, - `module.exports=new Map(${ - jsesc(flatPairs) - }.map((v,i,a)=>pair(i&1,a[i^1],v)))` - ].join(';'); - fs.writeFileSync( - path.resolve(dir, 'index.js'), - output - ); + if (isNamesCanon || type == 'Bidi_Mirroring_Glyph') { return; } @@ -211,6 +191,31 @@ const writeFiles = function(options) { `module.exports=${ symbolsExports }` ); }); + if (options.type == 'Bidi_Mirroring_Glyph') { + const type = options.type; + const dir = path.resolve( + __dirname, '..', + 'output', 'unicode-' + version, type + ); + if (!hasKey(dirMap, type)) { + dirMap[type] = []; + } + fs.mkdirSync(dir, { recursive: true }); + // `Bidi_Mirroring_Glyph/index.js` + // Note: `Bidi_Mirroring_Glyph` doesn’t have repeated strings; don’t gzip. + const output = [ + `const chr=String.fromCodePoint`, + `const pair=(t,u,v)=>[t?u+v:v,chr(t?u:u+v)]`, + `module.exports=new Map(${ + JSON.stringify(bidiMirroringGlyphFlatPairs) + }.map((v,i,a)=>pair(i&1,a[i^1],v)))` + ].join(';'); + fs.writeFileSync( + path.resolve(dir, 'index.js'), + output + ); + return; + } Object.keys(auxMap).forEach(function(type) { const dir = path.resolve( __dirname, '..', From 89544f1ceae6ce92fc6cebf0da33cd522a0d1ff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 6 Nov 2024 11:31:55 -0500 Subject: [PATCH 2/6] perf: use native JSON.stringify for gzipped data --- scripts/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utils.js b/scripts/utils.js index bc9314b..21baff8 100644 --- a/scripts/utils.js +++ b/scripts/utils.js @@ -11,7 +11,7 @@ const gzipInline = function(data) { if (data instanceof Map) { return `new Map(${ gzipInline([...data]) })`; } - const json = jsesc(data, { 'json': true }); + const json = JSON.stringify(data); const gzipBuffer = zlib.gzipSync(json); const str = gzipBuffer.toString('base64'); return `JSON.parse(require('zlib').gunzipSync(Buffer.from('${ str }','base64')))`; From 1f4e8ab8c5b3b914b1d495baf029cd93657e1ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 7 Nov 2024 08:26:23 -0500 Subject: [PATCH 3/6] minor cleanup --- scripts/generate-data.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/generate-data.js b/scripts/generate-data.js index c50f089..9405dcc 100644 --- a/scripts/generate-data.js +++ b/scripts/generate-data.js @@ -2,7 +2,6 @@ const resources = require('../data/resources.js'); const generateData = require('../index.js'); -const utils = require('../scripts/utils.js'); // ----------------------------------------------------------------------------- @@ -28,7 +27,9 @@ const complicatedWorkThatTakesTime = (resource, callback) => { console.log('[%s] Worker %d \u2192 Unicode v%s', getTime(), cluster.worker.id, version); + console.groupCollapsed(); generateData(version); + console.groupEnd(); complicatedWorkThatTakesTime( resource.slice(1), @@ -40,7 +41,7 @@ const complicatedWorkThatTakesTime = (resource, callback) => { } }; -if (cluster.isMaster) { +if (cluster.isPrimary) { for (let index = 0; index < numCPUs; index++) { cluster.fork(); From c6f9130c3e4b29f668094d7567e7a9d137c372a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 7 Nov 2024 09:16:28 -0500 Subject: [PATCH 4/6] build: exit with non-zero code if worker throws --- scripts/generate-data.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/generate-data.js b/scripts/generate-data.js index 9405dcc..51e62dc 100644 --- a/scripts/generate-data.js +++ b/scripts/generate-data.js @@ -44,7 +44,13 @@ const complicatedWorkThatTakesTime = (resource, callback) => { if (cluster.isPrimary) { for (let index = 0; index < numCPUs; index++) { - cluster.fork(); + const worker = cluster.fork(); + worker.on('message', (error) => { + for (const id in cluster.workers) { + cluster.workers[id].kill(); + } + throw new Error(`Worker ${worker.id} encountered an error: ${error}`); + }) } cluster.on('online', (worker) => { @@ -77,4 +83,14 @@ if (cluster.isPrimary) { }); }); + process.on('uncaughtException', (error) => { + console.error(error); + process.send(error.message); + }); + + process.on('unhandledRejection', (error) => { + console.error(error); + process.send(error.message || error); + }); + } From f7fdf25aab5040a480b81e4dd86f0485d211d5be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 7 Nov 2024 09:19:31 -0500 Subject: [PATCH 5/6] test: throw an error --- scripts/parse-blocks-scripts-properties.js | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/parse-blocks-scripts-properties.js b/scripts/parse-blocks-scripts-properties.js index 2f33520..78433cb 100644 --- a/scripts/parse-blocks-scripts-properties.js +++ b/scripts/parse-blocks-scripts-properties.js @@ -88,6 +88,7 @@ const parseBlocksScriptsProperties = function(type, version) { const parseDerivedBinaryProperties = function(version) { if (version === '3.1.1' || version === '3.1.0' || version === '3.0.1' || version === '3.0.0' || parseInt(version.split('.')[0], 10) < 3) { + throw new Error("Unexpected data"); // Unicode <= 3.1.1 does not provide derived-binary-properties, // so we should derive Bidi_Mirrored from the UnicodeData const source = utils.readDataFile(version, 'database'); From 89937b012c112bd1ae6120467dcea30b334d0194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 7 Nov 2024 09:20:58 -0500 Subject: [PATCH 6/6] Revert "test: throw an error" This reverts commit f7fdf25aab5040a480b81e4dd86f0485d211d5be. --- scripts/parse-blocks-scripts-properties.js | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/parse-blocks-scripts-properties.js b/scripts/parse-blocks-scripts-properties.js index 78433cb..2f33520 100644 --- a/scripts/parse-blocks-scripts-properties.js +++ b/scripts/parse-blocks-scripts-properties.js @@ -88,7 +88,6 @@ const parseBlocksScriptsProperties = function(type, version) { const parseDerivedBinaryProperties = function(version) { if (version === '3.1.1' || version === '3.1.0' || version === '3.0.1' || version === '3.0.0' || parseInt(version.split('.')[0], 10) < 3) { - throw new Error("Unexpected data"); // Unicode <= 3.1.1 does not provide derived-binary-properties, // so we should derive Bidi_Mirrored from the UnicodeData const source = utils.readDataFile(version, 'database');