Skip to content

Commit 9d0d443

Browse files
authored
Generate all plurals script (#5141)
* Generate all plurals script
1 parent 3aeedd2 commit 9d0d443

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

scripts/generateAllPlurals.mjs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* Copyright 2023 Adobe. All rights reserved.
3+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License. You may obtain a copy
5+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
6+
*
7+
* Unless required by applicable law or agreed to in writing, software distributed under
8+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9+
* OF ANY KIND, either express or implied. See the License for the specific language
10+
* governing permissions and limitations under the License.
11+
*/
12+
13+
/* Scrapes data on CLDR https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison
14+
* and generates a list of all possible values needed between all locales for plural rules.
15+
* It is used by our NumberParser to generate all literal strings, ex units 1 foot, 2 feet, but other locales have more than 2 forms.
16+
*/
17+
import {JSDOM} from 'jsdom';
18+
19+
function getRange(row) {
20+
let range = [];
21+
22+
let th = row.firstElementChild;
23+
24+
do {
25+
const { textContent } = th;
26+
27+
let [start, end = start] = textContent.split('-');
28+
29+
if (start === '') {
30+
range.push([]);
31+
} else {
32+
start = start.replace(/\.x$/, '.1');
33+
end = end.replace(/\.x$/, '.1');
34+
35+
range.push([Number(start), Number(end)]);
36+
}
37+
} while ((th = th.nextElementSibling));
38+
39+
return range;
40+
}
41+
42+
function getLocales(cell) {
43+
let locales = [];
44+
45+
let element = cell.firstElementChild;
46+
47+
do {
48+
if (element.tagName === 'SPAN') {
49+
locales.push(element.title);
50+
}
51+
} while ((element = element.nextElementSibling));
52+
53+
return locales;
54+
}
55+
56+
function getPlurals(tr, categories, range) {
57+
const rules = Object.fromEntries(categories.map((key) => [key, []]));
58+
59+
let td = tr.firstElementChild;
60+
61+
let index = 1;
62+
63+
do {
64+
const category = td.title;
65+
66+
let columns = td.hasAttribute('colspan')
67+
? Number(td.getAttribute('colspan'))
68+
: 1;
69+
70+
do {
71+
rules[category].push(range[index]);
72+
73+
index++;
74+
} while (columns-- > 1);
75+
} while ((td = td.nextElementSibling));
76+
77+
return rules;
78+
}
79+
80+
function extractTable(dom, integerTable, fractionTable) {
81+
function extract(table) {
82+
const tbody = table.querySelector('tbody');
83+
84+
let tr = tbody.firstElementChild;
85+
86+
let current;
87+
let results = {};
88+
89+
do {
90+
if (tr.firstElementChild.tagName === 'TH') {
91+
if (current) {
92+
for (const language of current.languages) {
93+
if (!results[language]) {
94+
results[language] = Object.fromEntries(
95+
Object.keys(current.rules).map((key) => [key, []])
96+
);
97+
}
98+
99+
for (let rule in current.rules) {
100+
results[language][rule] = [].concat(
101+
results[language][rule],
102+
current.rules[rule]
103+
);
104+
}
105+
}
106+
}
107+
108+
current = {
109+
range: [],
110+
languages: [],
111+
rules: {},
112+
};
113+
114+
current.range = getRange(tr);
115+
} else if (
116+
tr.children[1] instanceof dom.window.HTMLTableCellElement &&
117+
tr.children[1].classList.contains('l')
118+
) {
119+
current.languages = getLocales(tr.children[1]);
120+
} else {
121+
const [lang] = current['languages'];
122+
123+
const { pluralCategories } = new Intl.PluralRules(
124+
lang
125+
).resolvedOptions();
126+
127+
current.rules = getPlurals(tr, pluralCategories, current.range);
128+
}
129+
} while ((tr = tr.nextElementSibling));
130+
131+
return results;
132+
}
133+
134+
const integer = extract(integerTable);
135+
const fraction = extract(fractionTable);
136+
137+
let values = new Set();
138+
139+
for (let language in integer) {
140+
for (let rule in integer[language]) {
141+
if (integer[language][rule].length > 1) {
142+
values.add(integer[language][rule][0][0]);
143+
}
144+
}
145+
}
146+
147+
for (let language in fraction) {
148+
for (let rule in fraction[language]) {
149+
if (fraction[language][rule].length > 1) {
150+
values.add(fraction[language][rule][0][0]);
151+
}
152+
}
153+
}
154+
155+
return Array.from(values);
156+
}
157+
158+
fetch('https://www.unicode.org/cldr/charts/43/supplemental/language_plural_rules.html#comparison')
159+
.then(async (response) => {
160+
let data = await response.text();
161+
const dom = new JSDOM(data);
162+
163+
const [integerTable, fractionTable] = dom.window.document.querySelectorAll('.pluralComp');
164+
165+
let values = extractTable(dom, integerTable, fractionTable);
166+
console.log(values);
167+
});

0 commit comments

Comments
 (0)