Skip to content

Commit ba88b63

Browse files
author
yihao03
committed
Enhance translation performance by measuring execution time and refactoring segment handling to use arrays for better management
1 parent e45ede2 commit ba88b63

File tree

1 file changed

+50
-17
lines changed

1 file changed

+50
-17
lines changed

i18n/controllers/recurTranslate.ts

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const createParser = () =>
2525
(sax as any).createStream(true, { trim: false }, { strictEntities: true });
2626

2727
async function translate(language: string, filePath: string): Promise<void> {
28+
const startTime = new Date().getTime();
2829
try {
2930
// Pipe the XML file into the parser.
3031
const input_dir = fileURLToPath(
@@ -45,6 +46,9 @@ async function translate(language: string, filePath: string): Promise<void> {
4546
console.log(`Translation saved to ${output_path}`);
4647
} catch (parseErr) {
4748
console.error("Error parsing XML:", parseErr);
49+
} finally {
50+
const elapsed = new Date().getTime() - startTime;
51+
console.log(filePath + " took " + elapsed / 1000.0 + " seconds");
4852
}
4953
}
5054

@@ -59,7 +63,7 @@ async function recursivelyTranslate(
5963
return await translateChunk(ori); // translate the chunk
6064
}
6165

62-
let subTranslated = "";
66+
let subTranslated: string[] = [];
6367
// continue splitting the chunk
6468
// Create a SAX parser in strict mode to split source into chunks.
6569
await new Promise<void>((resolve, reject) => {
@@ -71,6 +75,10 @@ async function recursivelyTranslate(
7175
let subIsRecording = false;
7276

7377
subParser.on("opentag", node => {
78+
if (node.name === "WRAPPER") {
79+
return;
80+
}
81+
7482
subCurrentDepth++;
7583

7684
// If we're at depth 2, this is the start of a new segment.
@@ -95,17 +103,14 @@ async function recursivelyTranslate(
95103
subSegments[subSegments.length - 1][0]
96104
) {
97105
subSegments[subSegments.length - 1][1] += text;
98-
subSegments[subSegments.length - 1][0] = true;
99-
} else {
100-
if (
106+
} else if (
101107
text.trim() !== "" ||
102108
text.trim() === "," ||
103109
text.trim() === "."
104110
) {
105111
subSegments.push([false, text]);
106112
} else {
107113
subSegments.push([true, text]);
108-
}
109114
}
110115
}
111116
});
@@ -117,16 +122,36 @@ async function recursivelyTranslate(
117122
});
118123

119124
subParser.on("closetag", tagName => {
125+
if (tagName === "WRAPPER") {
126+
return;
127+
}
128+
120129
if (subIsRecording) {
121130
subCurrentSegment += `</${tagName}>`;
122131
}
123132

124133
if (subCurrentDepth === 2) {
125134
// We are closing a segment element.
126-
if (tagName === "LATEXINLINE") {
135+
if (
136+
tagName === "LATEXINLINE" ||
137+
tagName === "LATEX" ||
138+
tagName === "SNIPPET" ||
139+
tagName === "SCHEMEINLINE"
140+
) {
127141
subSegments.push([false, subCurrentSegment]);
128142
} else {
143+
if (
144+
subSegments.length > 0 &&
145+
subSegments[subSegments.length - 1][0] &&
146+
(subSegments[subSegments.length - 1][1].length +
147+
subCurrentSegment.length) <
148+
MAXLEN
149+
) {
150+
console.log("Merging segments");
151+
subSegments[subSegments.length - 1][1] += subCurrentSegment;
152+
} else {
129153
subSegments.push([true, subCurrentSegment]);
154+
}
130155
}
131156
subCurrentSegment = "";
132157
subIsRecording = false;
@@ -151,20 +176,20 @@ async function recursivelyTranslate(
151176
subParser.on("end", async () => {
152177
for (const segment of subSegments) {
153178
if (segment[0]) {
154-
subTranslated += await helper(segment[1], false);
179+
subTranslated.push(await helper(segment[1], false));
155180
} else {
156-
subTranslated += segment[1];
181+
subTranslated.push(segment[1]);
157182
}
158183
}
159184
resolve();
160185
});
161186

162187
subParser.on("error", reject);
163188

164-
Readable.from(ori).pipe(subParser);
189+
Readable.from("<WRAPPER>" + ori + "</WRAPPER>").pipe(subParser);
165190
});
166191

167-
return subTranslated;
192+
return subTranslated.join("");
168193
}
169194

170195
// Create a SAX parser in strict mode to split source into chunks.
@@ -173,7 +198,7 @@ async function recursivelyTranslate(
173198
// const assistant = await createAssistant(language, ai);
174199
const assistant_id = "asst_BLVYfog5DpWrbu3fW3o2oD4r";
175200
const thread = await ai.beta.threads.create();
176-
let translated = "";
201+
let translated: String[] = [];
177202

178203
try {
179204
await new Promise<void>((resolve, reject) => {
@@ -250,9 +275,9 @@ async function recursivelyTranslate(
250275
parser.on("end", async () => {
251276
for (const segment of segments) {
252277
if (segment[0]) {
253-
translated += await helper(segment[1], false);
278+
translated.push(await helper(segment[1], false));
254279
} else {
255-
translated += segment[1];
280+
translated.push(segment[1]);
256281
}
257282
}
258283
console.log(`Done translating all segments.`);
@@ -264,14 +289,19 @@ async function recursivelyTranslate(
264289
fs.createReadStream(path).pipe(parser);
265290
});
266291

267-
return translated;
292+
return translated.join("");
268293
} catch (parseErr) {
269294
console.error("Error parsing XML:", parseErr);
270-
return translated + "<!-- Error parsing this section -->";
295+
return translated.join("") + "<!-- Error parsing this section -->";
271296
}
272297

273298
async function translateChunk(chunk: string): Promise<string> {
299+
if (chunk.trim() === "" || chunk.trim() === "," || chunk.trim() === ".") {
300+
return chunk;
301+
}
302+
274303
let translatedChunk = "";
304+
console.log("Translating chunk of length: " + chunk.length + "\n" + chunk);
275305

276306
try {
277307
await ai.beta.threads.messages.create(thread.id, {
@@ -364,7 +394,10 @@ async function recursivelyTranslate(
364394
return translatedChunk;
365395
} catch (err) {
366396
console.log(`Error occured while translating ${path}:\n ` + err);
367-
return translatedChunk + "<!-- Error translating this section -->";
397+
return (
398+
translatedChunk +
399+
`<!-- Error occured while translating this section-->\n<!-- Error: ${err.length < 50 ? err : err.subString(0, 50) + "..."}-->`
400+
);
368401
}
369402
}
370403
}
@@ -393,4 +426,4 @@ function strongEscapeXML(str: string): string {
393426
.replace(/>/g, "&gt;")
394427
.replace(/"/g, "&quot;")
395428
.replace(/'/g, "&apos;");
396-
}
429+
}

0 commit comments

Comments
 (0)