@@ -25,6 +25,7 @@ const createParser = () =>
25
25
( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
26
26
27
27
async function translate ( language : string , filePath : string ) : Promise < void > {
28
+ const startTime = new Date ( ) . getTime ( ) ;
28
29
try {
29
30
// Pipe the XML file into the parser.
30
31
const input_dir = fileURLToPath (
@@ -45,6 +46,9 @@ async function translate(language: string, filePath: string): Promise<void> {
45
46
console . log ( `Translation saved to ${ output_path } ` ) ;
46
47
} catch ( parseErr ) {
47
48
console . error ( "Error parsing XML:" , parseErr ) ;
49
+ } finally {
50
+ const elapsed = new Date ( ) . getTime ( ) - startTime ;
51
+ console . log ( filePath + " took " + elapsed / 1000.0 + " seconds" ) ;
48
52
}
49
53
}
50
54
@@ -59,7 +63,7 @@ async function recursivelyTranslate(
59
63
return await translateChunk ( ori ) ; // translate the chunk
60
64
}
61
65
62
- let subTranslated = "" ;
66
+ let subTranslated : string [ ] = [ ] ;
63
67
// continue splitting the chunk
64
68
// Create a SAX parser in strict mode to split source into chunks.
65
69
await new Promise < void > ( ( resolve , reject ) => {
@@ -71,6 +75,10 @@ async function recursivelyTranslate(
71
75
let subIsRecording = false ;
72
76
73
77
subParser . on ( "opentag" , node => {
78
+ if ( node . name === "WRAPPER" ) {
79
+ return ;
80
+ }
81
+
74
82
subCurrentDepth ++ ;
75
83
76
84
// If we're at depth 2, this is the start of a new segment.
@@ -95,17 +103,14 @@ async function recursivelyTranslate(
95
103
subSegments [ subSegments . length - 1 ] [ 0 ]
96
104
) {
97
105
subSegments [ subSegments . length - 1 ] [ 1 ] += text ;
98
- subSegments [ subSegments . length - 1 ] [ 0 ] = true ;
99
- } else {
100
- if (
106
+ } else if (
101
107
text . trim ( ) !== "" ||
102
108
text . trim ( ) === "," ||
103
109
text . trim ( ) === "."
104
110
) {
105
111
subSegments . push ( [ false , text ] ) ;
106
112
} else {
107
113
subSegments . push ( [ true , text ] ) ;
108
- }
109
114
}
110
115
}
111
116
} ) ;
@@ -117,16 +122,36 @@ async function recursivelyTranslate(
117
122
} ) ;
118
123
119
124
subParser . on ( "closetag" , tagName => {
125
+ if ( tagName === "WRAPPER" ) {
126
+ return ;
127
+ }
128
+
120
129
if ( subIsRecording ) {
121
130
subCurrentSegment += `</${ tagName } >` ;
122
131
}
123
132
124
133
if ( subCurrentDepth === 2 ) {
125
134
// We are closing a segment element.
126
- if ( tagName === "LATEXINLINE" ) {
135
+ if (
136
+ tagName === "LATEXINLINE" ||
137
+ tagName === "LATEX" ||
138
+ tagName === "SNIPPET" ||
139
+ tagName === "SCHEMEINLINE"
140
+ ) {
127
141
subSegments . push ( [ false , subCurrentSegment ] ) ;
128
142
} else {
143
+ if (
144
+ subSegments . length > 0 &&
145
+ subSegments [ subSegments . length - 1 ] [ 0 ] &&
146
+ ( subSegments [ subSegments . length - 1 ] [ 1 ] . length +
147
+ subCurrentSegment . length ) <
148
+ MAXLEN
149
+ ) {
150
+ console . log ( "Merging segments" ) ;
151
+ subSegments [ subSegments . length - 1 ] [ 1 ] += subCurrentSegment ;
152
+ } else {
129
153
subSegments . push ( [ true , subCurrentSegment ] ) ;
154
+ }
130
155
}
131
156
subCurrentSegment = "" ;
132
157
subIsRecording = false ;
@@ -151,20 +176,20 @@ async function recursivelyTranslate(
151
176
subParser . on ( "end" , async ( ) => {
152
177
for ( const segment of subSegments ) {
153
178
if ( segment [ 0 ] ) {
154
- subTranslated += await helper ( segment [ 1 ] , false ) ;
179
+ subTranslated . push ( await helper ( segment [ 1 ] , false ) ) ;
155
180
} else {
156
- subTranslated += segment [ 1 ] ;
181
+ subTranslated . push ( segment [ 1 ] ) ;
157
182
}
158
183
}
159
184
resolve ( ) ;
160
185
} ) ;
161
186
162
187
subParser . on ( "error" , reject ) ;
163
188
164
- Readable . from ( ori ) . pipe ( subParser ) ;
189
+ Readable . from ( "<WRAPPER>" + ori + "</WRAPPER>" ) . pipe ( subParser ) ;
165
190
} ) ;
166
191
167
- return subTranslated ;
192
+ return subTranslated . join ( "" ) ;
168
193
}
169
194
170
195
// Create a SAX parser in strict mode to split source into chunks.
@@ -173,7 +198,7 @@ async function recursivelyTranslate(
173
198
// const assistant = await createAssistant(language, ai);
174
199
const assistant_id = "asst_BLVYfog5DpWrbu3fW3o2oD4r" ;
175
200
const thread = await ai . beta . threads . create ( ) ;
176
- let translated = "" ;
201
+ let translated : String [ ] = [ ] ;
177
202
178
203
try {
179
204
await new Promise < void > ( ( resolve , reject ) => {
@@ -250,9 +275,9 @@ async function recursivelyTranslate(
250
275
parser . on ( "end" , async ( ) => {
251
276
for ( const segment of segments ) {
252
277
if ( segment [ 0 ] ) {
253
- translated += await helper ( segment [ 1 ] , false ) ;
278
+ translated . push ( await helper ( segment [ 1 ] , false ) ) ;
254
279
} else {
255
- translated += segment [ 1 ] ;
280
+ translated . push ( segment [ 1 ] ) ;
256
281
}
257
282
}
258
283
console . log ( `Done translating all segments.` ) ;
@@ -264,14 +289,19 @@ async function recursivelyTranslate(
264
289
fs . createReadStream ( path ) . pipe ( parser ) ;
265
290
} ) ;
266
291
267
- return translated ;
292
+ return translated . join ( "" ) ;
268
293
} catch ( parseErr ) {
269
294
console . error ( "Error parsing XML:" , parseErr ) ;
270
- return translated + "<!-- Error parsing this section -->" ;
295
+ return translated . join ( "" ) + "<!-- Error parsing this section -->" ;
271
296
}
272
297
273
298
async function translateChunk ( chunk : string ) : Promise < string > {
299
+ if ( chunk . trim ( ) === "" || chunk . trim ( ) === "," || chunk . trim ( ) === "." ) {
300
+ return chunk ;
301
+ }
302
+
274
303
let translatedChunk = "" ;
304
+ console . log ( "Translating chunk of length: " + chunk . length + "\n" + chunk ) ;
275
305
276
306
try {
277
307
await ai . beta . threads . messages . create ( thread . id , {
@@ -364,7 +394,10 @@ async function recursivelyTranslate(
364
394
return translatedChunk ;
365
395
} catch ( err ) {
366
396
console . log ( `Error occured while translating ${ path } :\n ` + err ) ;
367
- return translatedChunk + "<!-- Error translating this section -->" ;
397
+ return (
398
+ translatedChunk +
399
+ `<!-- Error occured while translating this section-->\n<!-- Error: ${ err . length < 50 ? err : err . subString ( 0 , 50 ) + "..." } -->`
400
+ ) ;
368
401
}
369
402
}
370
403
}
@@ -393,4 +426,4 @@ function strongEscapeXML(str: string): string {
393
426
. replace ( / > / g, ">" )
394
427
. replace ( / " / g, """ )
395
428
. replace ( / ' / g, "'" ) ;
396
- }
429
+ }
0 commit comments