Skip to content

Commit e45ede2

Browse files
author
yihao03
committed
Refactor recursion logic in translate function to improve orphaned text handling and error reporting
1 parent f0316dc commit e45ede2

File tree

1 file changed

+26
-18
lines changed

1 file changed

+26
-18
lines changed

i18n/controllers/recurTranslate.ts

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import dotenv from "dotenv";
66
import sax from "sax";
77
import { Readable } from "stream";
88
import { fileURLToPath } from "url";
9-
import { strict } from "assert";
109

1110
dotenv.config();
1211

@@ -20,9 +19,10 @@ const ai = new OpenAI({
2019
baseURL: process.env.AI_BASEURL
2120
});
2221

23-
const MAXLEN = 5000;
22+
const MAXLEN = 3000;
2423

25-
const createParser = () => (sax as any).createStream(true, { trim: false }, { strictEntities: true });
24+
const createParser = () =>
25+
(sax as any).createStream(true, { trim: false }, { strictEntities: true });
2626

2727
async function translate(language: string, filePath: string): Promise<void> {
2828
try {
@@ -92,18 +92,20 @@ async function recursivelyTranslate(
9292
} else {
9393
if (
9494
subSegments.length > 0 &&
95-
subSegments[subSegments.length - 1][1] != undefined
95+
subSegments[subSegments.length - 1][0]
9696
) {
9797
subSegments[subSegments.length - 1][1] += text;
9898
subSegments[subSegments.length - 1][0] = true;
99-
100-
// if (text == "\n " || text == "\r\n " || text == ", \n" || text == ", \r\n") {
101-
// subSegments.push([false, text]);
102-
// } else {
103-
// subSegments.push([true, text]);
104-
// }
10599
} else {
106-
subSegments.push([true, text]);
100+
if (
101+
text.trim() !== "" ||
102+
text.trim() === "," ||
103+
text.trim() === "."
104+
) {
105+
subSegments.push([false, text]);
106+
} else {
107+
subSegments.push([true, text]);
108+
}
107109
}
108110
}
109111
});
@@ -121,7 +123,11 @@ async function recursivelyTranslate(
121123

122124
if (subCurrentDepth === 2) {
123125
// We are closing a segment element.
124-
subSegments.push([true, subCurrentSegment]);
126+
if (tagName === "LATEXINLINE") {
127+
subSegments.push([false, subCurrentSegment]);
128+
} else {
129+
subSegments.push([true, subCurrentSegment]);
130+
}
125131
subCurrentSegment = "";
126132
subIsRecording = false;
127133
}
@@ -336,18 +342,17 @@ async function recursivelyTranslate(
336342
clean.on("error", error => {
337343
console.log(
338344
"error encountered when validating XML: " +
339-
error +
340-
"\nvalidating section: " +
341-
chunk.substring(0, 100) +
342-
"..."
345+
error + "\nfile: " + path +
346+
"\n section: " +
347+
(safeText.length > 50 ? safeText.substring(0, 100) + "..." : safeText )
343348
);
344349

345350
// Attempt to recover using the internal parser
346351
try {
347352
clean._parser.resume();
348353
} catch (e) {
349354
console.log("Failed to resume parser:", e);
350-
reject();
355+
reject(e);
351356
}
352357
});
353358

@@ -375,7 +380,10 @@ function formatAttributes(attrs) {
375380
}
376381

377382
function escapeXML(str: string): string {
378-
return str.replace(/&(?!(?:amp;|lt;|gt;|apos;|quot;))/g, "&amp;");
383+
return str
384+
.replace(/&(?!(?:amp;|lt;|gt;|apos;|quot;))/g, "&amp;")
385+
.replace(/<([^a-zA-Z\/])/g, "&lt;$1") // Fix lone < characters
386+
.replace(/([^a-zA-Z0-9"'\s\/])>/g, "$1&gt;"); // Fix lone > characters;
379387
}
380388

381389
function strongEscapeXML(str: string): string {

0 commit comments

Comments
 (0)