@@ -274,7 +274,7 @@ fn prepare_pre_prompt(
274
274
275
275
#[ allow( clippy:: too_many_arguments) ]
276
276
fn tokenize_input (
277
- inputs : EncodingInput ,
277
+ mut inputs : EncodingInput ,
278
278
add_special_tokens : bool ,
279
279
max_input_length : usize ,
280
280
truncate_params : Option < TruncationParams > ,
@@ -288,9 +288,12 @@ fn tokenize_input(
288
288
let input_chars = inputs. count_chars ( ) ;
289
289
let limit = max_input_length * MAX_CHAR_MULTIPLIER ;
290
290
if input_chars > limit {
291
- return Err ( TextEmbeddingsError :: Validation ( format ! (
292
- "`inputs` must have less than {limit} characters. Given: {input_chars}"
293
- ) ) ) ;
291
+ if truncate_params. is_none ( ) {
292
+ return Err ( TextEmbeddingsError :: Validation ( format ! (
293
+ "`inputs` must have less than {limit} characters. Given: {input_chars}"
294
+ ) ) ) ;
295
+ }
296
+ inputs. apply_limit ( limit) ;
294
297
}
295
298
296
299
let encoding = match inputs {
@@ -426,6 +429,25 @@ impl EncodingInput {
426
429
EncodingInput :: Ids ( v) => v. len ( ) ,
427
430
}
428
431
}
432
+
433
+ fn apply_limit ( & mut self , limit : usize ) {
434
+ let truncate_string = |s : & mut String , limit : usize | {
435
+ if s. is_char_boundary ( limit) {
436
+ s. truncate ( limit)
437
+ }
438
+ } ;
439
+
440
+ match self {
441
+ EncodingInput :: Single ( s) => {
442
+ truncate_string ( s, limit) ;
443
+ }
444
+ EncodingInput :: Dual ( s1, s2) => {
445
+ truncate_string ( s1, limit / 2 ) ;
446
+ truncate_string ( s2, limit / 2 ) ;
447
+ }
448
+ EncodingInput :: Ids ( _) => { }
449
+ }
450
+ }
429
451
}
430
452
431
453
impl From < String > for EncodingInput {
0 commit comments