41
41
import java .util .ArrayDeque ;
42
42
import java .util .ArrayList ;
43
43
import java .util .HashMap ;
44
+ import java .util .Locale ;
44
45
import java .util .Map ;
45
46
import java .util .Optional ;
46
47
@@ -480,10 +481,11 @@ private void performTextTranslation(final String textToTranslate, final CustomLo
480
481
//tokenization
481
482
long time = System .currentTimeMillis ();
482
483
TokenizerResult input = null ;
484
+ String correctedSubText = correctText (textSplit .get (i ), inputLanguage .getLocale ());
483
485
if (mode == MADLAD_CACHE ) {
484
- input = tokenizer .tokenize (inputLanguage .getCode (), outputLanguage .getCode (), textSplit . get ( i ) );
486
+ input = tokenizer .tokenize (inputLanguage .getCode (), outputLanguage .getCode (), correctedSubText );
485
487
} else { //if mode == NLLB_CACHE
486
- input = tokenizer .tokenize (getNllbLanguageCode (inputLanguage .getCode ()), getNllbLanguageCode (outputLanguage .getCode ()), textSplit . get ( i ) );
488
+ input = tokenizer .tokenize (getNllbLanguageCode (inputLanguage .getCode ()), getNllbLanguageCode (outputLanguage .getCode ()), correctedSubText );
487
489
}
488
490
android .util .Log .i ("performance" , "Tokenization done in: " + (System .currentTimeMillis () - time ) + "ms" );
489
491
//encoder execution
@@ -508,10 +510,11 @@ private void performTextTranslation(final String textToTranslate, final CustomLo
508
510
executeCacheDecoderGreedy (input , encoderResult , completeOutput , outputLanguage , new TranslateListener () {
509
511
@ Override
510
512
public void onTranslatedText (String text , long resultID , boolean isFinal , CustomLocale languageOfText ) {
513
+ //we return the partial results
511
514
String outputText ;
512
515
if (joinedStringOutput [0 ].equals ("" )){
513
516
outputText = joinedStringOutput [0 ] + text ;
514
- }else {
517
+ } else {
515
518
outputText = joinedStringOutput [0 ] + " " + text ;
516
519
}
517
520
if (saveResults ) {
@@ -527,6 +530,7 @@ public void onTranslatedText(String text, long resultID, boolean isFinal, Custom
527
530
528
531
@ Override
529
532
public void onFailure (int [] reasons , long value ) {
533
+ //we do not return the partial results and notify an error
530
534
if (responseListener != null ) {
531
535
mainHandler .post (() -> responseListener .onFailure (reasons , value ));
532
536
} else {
@@ -1114,6 +1118,41 @@ public long getCurrentResultID(){
1114
1118
return currentResultID ;
1115
1119
}
1116
1120
1121
+ private String correctText (String text , Locale locale ){
1122
+ String correctedText = text ;
1123
+ String language = locale .getLanguage ();
1124
+ //we add an eventual period if missing (or in general a terminator symbol)
1125
+ if (!language .equals ("th" )) {
1126
+ correctedText = correctedText .trim (); //we remove eventual white space from both ends of the text
1127
+ if (correctedText .length () >= 2 ) {
1128
+ if (!Character .isLetterOrDigit (correctedText .charAt (correctedText .length () - 1 ))) {
1129
+ return correctedText ;
1130
+ }
1131
+ return correctedText + getSentenceTerminator (locale );
1132
+ }
1133
+ }
1134
+ return text ;
1135
+ }
1136
+
1137
+ private static String getSentenceTerminator (Locale locale ) {
1138
+ // Assuming most languages use a period (.)
1139
+ // Add custom cases for specific languages as needed
1140
+ String language = locale .getLanguage ();
1141
+ switch (language ) {
1142
+ case "zh" : // Chinese
1143
+ case "ja" : // Japanese
1144
+ case "ko" : // Korean
1145
+ return "。" ; // Ideographic full stop
1146
+ case "hi" : // Hindi
1147
+ return "।" ;
1148
+ case "my" : // Burmese
1149
+ return "။" ; // Burmese full stop
1150
+ // Add other cases as needed for more languages
1151
+ default :
1152
+ return "." ;
1153
+ }
1154
+ }
1155
+
1117
1156
1118
1157
private void initializeNllbLanguagesCodes (Context context ){
1119
1158
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory .newInstance ();
0 commit comments