@@ -6,7 +6,6 @@ import dotenv from "dotenv";
6
6
import sax from "sax" ;
7
7
import { Readable } from "stream" ;
8
8
import { fileURLToPath } from "url" ;
9
- import { strict } from "assert" ;
10
9
11
10
dotenv . config ( ) ;
12
11
@@ -20,9 +19,10 @@ const ai = new OpenAI({
20
19
baseURL : process . env . AI_BASEURL
21
20
} ) ;
22
21
23
- const MAXLEN = 5000 ;
22
+ const MAXLEN = 3000 ;
24
23
25
- const createParser = ( ) => ( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
24
+ const createParser = ( ) =>
25
+ ( sax as any ) . createStream ( true , { trim : false } , { strictEntities : true } ) ;
26
26
27
27
async function translate ( language : string , filePath : string ) : Promise < void > {
28
28
try {
@@ -92,18 +92,20 @@ async function recursivelyTranslate(
92
92
} else {
93
93
if (
94
94
subSegments . length > 0 &&
95
- subSegments [ subSegments . length - 1 ] [ 1 ] != undefined
95
+ subSegments [ subSegments . length - 1 ] [ 0 ]
96
96
) {
97
97
subSegments [ subSegments . length - 1 ] [ 1 ] += text ;
98
98
subSegments [ subSegments . length - 1 ] [ 0 ] = true ;
99
-
100
- // if (text == "\n " || text == "\r\n " || text == ", \n" || text == ", \r\n") {
101
- // subSegments.push([false, text]);
102
- // } else {
103
- // subSegments.push([true, text]);
104
- // }
105
99
} else {
106
- subSegments . push ( [ true , text ] ) ;
100
+ if (
101
+ text . trim ( ) !== "" ||
102
+ text . trim ( ) === "," ||
103
+ text . trim ( ) === "."
104
+ ) {
105
+ subSegments . push ( [ false , text ] ) ;
106
+ } else {
107
+ subSegments . push ( [ true , text ] ) ;
108
+ }
107
109
}
108
110
}
109
111
} ) ;
@@ -121,7 +123,11 @@ async function recursivelyTranslate(
121
123
122
124
if ( subCurrentDepth === 2 ) {
123
125
// We are closing a segment element.
124
- subSegments . push ( [ true , subCurrentSegment ] ) ;
126
+ if ( tagName === "LATEXINLINE" ) {
127
+ subSegments . push ( [ false , subCurrentSegment ] ) ;
128
+ } else {
129
+ subSegments . push ( [ true , subCurrentSegment ] ) ;
130
+ }
125
131
subCurrentSegment = "" ;
126
132
subIsRecording = false ;
127
133
}
@@ -336,18 +342,17 @@ async function recursivelyTranslate(
336
342
clean . on ( "error" , error => {
337
343
console . log (
338
344
"error encountered when validating XML: " +
339
- error +
340
- "\nvalidating section: " +
341
- chunk . substring ( 0 , 100 ) +
342
- "..."
345
+ error + "\nfile: " + path +
346
+ "\n section: " +
347
+ ( safeText . length > 50 ? safeText . substring ( 0 , 100 ) + "..." : safeText )
343
348
) ;
344
349
345
350
// Attempt to recover using the internal parser
346
351
try {
347
352
clean . _parser . resume ( ) ;
348
353
} catch ( e ) {
349
354
console . log ( "Failed to resume parser:" , e ) ;
350
- reject ( ) ;
355
+ reject ( e ) ;
351
356
}
352
357
} ) ;
353
358
@@ -375,7 +380,10 @@ function formatAttributes(attrs) {
375
380
}
376
381
377
382
function escapeXML ( str : string ) : string {
378
- return str . replace ( / & (? ! (?: a m p ; | l t ; | g t ; | a p o s ; | q u o t ; ) ) / g, "&" ) ;
383
+ return str
384
+ . replace ( / & (? ! (?: a m p ; | l t ; | g t ; | a p o s ; | q u o t ; ) ) / g, "&" )
385
+ . replace ( / < ( [ ^ a - z A - Z \/ ] ) / g, "<$1" ) // Fix lone < characters
386
+ . replace ( / ( [ ^ a - z A - Z 0 - 9 " ' \s \/ ] ) > / g, "$1>" ) ; // Fix lone > characters;
379
387
}
380
388
381
389
function strongEscapeXML ( str : string ) : string {
0 commit comments