@@ -151,6 +151,8 @@ async fn predict(
151
151
) )
152
152
} ;
153
153
154
+ let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
155
+
154
156
let ( response, metadata) = match req. inputs {
155
157
PredictInput :: Single ( inputs) => {
156
158
metrics:: increment_counter!( "te_request_count" , "method" => "single" ) ;
@@ -159,7 +161,7 @@ async fn predict(
159
161
let permit = infer. try_acquire_permit ( ) . map_err ( ErrorResponse :: from) ?;
160
162
let ( prompt_tokens, tokenization, queue, inference, predictions) = predict_inner (
161
163
inputs,
162
- req . truncate . unwrap_or ( info . auto_truncate ) ,
164
+ truncate,
163
165
req. raw_scores ,
164
166
infer. 0 ,
165
167
info. 0 ,
@@ -208,7 +210,7 @@ async fn predict(
208
210
let local_info = info. clone ( ) ;
209
211
futures. push ( predict_inner (
210
212
input,
211
- req . truncate . unwrap_or ( info . auto_truncate ) ,
213
+ truncate,
212
214
req. raw_scores ,
213
215
local_infer. 0 ,
214
216
local_info. 0 ,
@@ -342,6 +344,8 @@ async fn rerank(
342
344
) )
343
345
} ;
344
346
347
+ let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
348
+
345
349
let ( response, metadata) = {
346
350
metrics:: increment_counter!( "te_request_count" , "method" => "batch" ) ;
347
351
@@ -370,7 +374,7 @@ async fn rerank(
370
374
futures. push ( rerank_inner (
371
375
req. query . clone ( ) ,
372
376
text. clone ( ) ,
373
- req . truncate . unwrap_or ( info . auto_truncate ) ,
377
+ truncate,
374
378
req. raw_scores ,
375
379
local_infer. 0 ,
376
380
) )
@@ -470,6 +474,8 @@ async fn embed(
470
474
let span = tracing:: Span :: current ( ) ;
471
475
let start_time = Instant :: now ( ) ;
472
476
477
+ let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
478
+
473
479
let ( response, metadata) = match req. inputs {
474
480
Input :: Single ( input) => {
475
481
metrics:: increment_counter!( "te_request_count" , "method" => "single" ) ;
@@ -478,12 +484,7 @@ async fn embed(
478
484
479
485
let permit = infer. try_acquire_permit ( ) . map_err ( ErrorResponse :: from) ?;
480
486
let response = infer
481
- . embed_pooled (
482
- input,
483
- req. truncate . unwrap_or ( info. auto_truncate ) ,
484
- req. normalize ,
485
- permit,
486
- )
487
+ . embed_pooled ( input, truncate, req. normalize , permit)
487
488
. await
488
489
. map_err ( ErrorResponse :: from) ?;
489
490
@@ -536,7 +537,6 @@ async fn embed(
536
537
for input in inputs {
537
538
compute_chars += input. count_chars ( ) ;
538
539
539
- let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
540
540
let local_infer = infer. clone ( ) ;
541
541
futures. push ( async move {
542
542
let permit = local_infer. acquire_permit ( ) . await ;
@@ -631,6 +631,7 @@ async fn embed_sparse(
631
631
}
632
632
sparse_values
633
633
} ;
634
+ let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
634
635
635
636
let ( response, metadata) = match req. inputs {
636
637
Input :: Single ( input) => {
@@ -640,7 +641,7 @@ async fn embed_sparse(
640
641
641
642
let permit = infer. try_acquire_permit ( ) . map_err ( ErrorResponse :: from) ?;
642
643
let response = infer
643
- . embed_sparse ( input, req . truncate . unwrap_or ( info . auto_truncate ) , permit)
644
+ . embed_sparse ( input, truncate, permit)
644
645
. await
645
646
. map_err ( ErrorResponse :: from) ?;
646
647
@@ -693,7 +694,6 @@ async fn embed_sparse(
693
694
for input in inputs {
694
695
compute_chars += input. count_chars ( ) ;
695
696
696
- let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
697
697
let local_infer = infer. clone ( ) ;
698
698
futures. push ( async move {
699
699
let permit = local_infer. acquire_permit ( ) . await ;
@@ -779,6 +779,8 @@ async fn embed_all(
779
779
let span = tracing:: Span :: current ( ) ;
780
780
let start_time = Instant :: now ( ) ;
781
781
782
+ let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
783
+
782
784
let ( response, metadata) = match req. inputs {
783
785
Input :: Single ( input) => {
784
786
metrics:: increment_counter!( "te_request_count" , "method" => "single" ) ;
@@ -787,7 +789,7 @@ async fn embed_all(
787
789
788
790
let permit = infer. try_acquire_permit ( ) . map_err ( ErrorResponse :: from) ?;
789
791
let response = infer
790
- . embed_all ( input, req . truncate . unwrap_or ( info . auto_truncate ) , permit)
792
+ . embed_all ( input, truncate, permit)
791
793
. await
792
794
. map_err ( ErrorResponse :: from) ?;
793
795
@@ -840,7 +842,6 @@ async fn embed_all(
840
842
for input in inputs {
841
843
compute_chars += input. count_chars ( ) ;
842
844
843
- let truncate = req. truncate . unwrap_or ( info. auto_truncate ) ;
844
845
let local_infer = infer. clone ( ) ;
845
846
futures. push ( async move {
846
847
let permit = local_infer. acquire_permit ( ) . await ;
@@ -925,6 +926,8 @@ async fn openai_embed(
925
926
let span = tracing:: Span :: current ( ) ;
926
927
let start_time = Instant :: now ( ) ;
927
928
929
+ let truncate = info. auto_truncate ;
930
+
928
931
let ( embeddings, metadata) = match req. input {
929
932
Input :: Single ( input) => {
930
933
metrics:: increment_counter!( "te_request_count" , "method" => "single" ) ;
@@ -933,7 +936,7 @@ async fn openai_embed(
933
936
934
937
let permit = infer. try_acquire_permit ( ) . map_err ( ErrorResponse :: from) ?;
935
938
let response = infer
936
- . embed_pooled ( input, false , true , permit)
939
+ . embed_pooled ( input, truncate , true , permit)
937
940
. await
938
941
. map_err ( ErrorResponse :: from) ?;
939
942
@@ -993,7 +996,9 @@ async fn openai_embed(
993
996
let local_infer = infer. clone ( ) ;
994
997
futures. push ( async move {
995
998
let permit = local_infer. acquire_permit ( ) . await ;
996
- local_infer. embed_pooled ( input, false , true , permit) . await
999
+ local_infer
1000
+ . embed_pooled ( input, truncate, true , permit)
1001
+ . await
997
1002
} )
998
1003
}
999
1004
let results = join_all ( futures)
0 commit comments