Skip to content

Commit 3336a63

Browse files
committed
update: grpc
1 parent dd87f5c commit 3336a63

File tree

4 files changed

+13
-13
lines changed

4 files changed

+13
-13
lines changed

core/src/infer.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -151,20 +151,16 @@ impl Infer {
151151
panic!("unexpected enum variant")
152152
};
153153

154-
// Timings
155154
let total_time = start_time.elapsed();
156155

157-
// Metrics
158-
let counter = metrics::counter!("te_embed_success");
159-
counter.increment(1);
160-
let histogram = metrics::histogram!("te_embed_duration");
161-
histogram.record(total_time.as_secs_f64());
162-
let histogram = metrics::histogram!("te_embed_tokenization_duration");
163-
histogram.record(response.metadata.tokenization.as_secs_f64());
164-
let histogram = metrics::histogram!("te_embed_queue_duration");
165-
histogram.record(response.metadata.queue.as_secs_f64());
166-
let histogram = metrics::histogram!("te_embed_inference_duration");
167-
histogram.record(response.metadata.inference.as_secs_f64());
156+
metrics::counter!("te_embed_success").increment(1);
157+
metrics::histogram!("te_embed_duration").record(total_time.as_secs_f64());
158+
metrics::histogram!("te_embed_tokenization_duration")
159+
.record(response.metadata.tokenization.as_secs_f64());
160+
metrics::histogram!("te_embed_queue_duration")
161+
.record(response.metadata.queue.as_secs_f64());
162+
metrics::histogram!("te_embed_inference_duration")
163+
.record(response.metadata.inference.as_secs_f64());
168164

169165
Ok(response)
170166
}
@@ -250,7 +246,7 @@ impl Infer {
250246
if let Some(dimensions) = dimensions {
251247
if dimensions == 0 {
252248
metrics::counter!("te_request_failure", "err" => "validation").increment(1);
253-
let message = "`dimensions` should be always positive".to_string();
249+
let message = "`dimensions` should be positive".to_string();
254250
tracing::error!("{message}");
255251
return Err(TextEmbeddingsError::Validation(message));
256252
}

proto/tei.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ message EmbedRequest {
8080
bool normalize = 3;
8181
TruncationDirection truncation_direction = 4;
8282
optional string prompt_name = 5;
83+
optional uint32 dimensions = 6;
8384
}
8485

8586
message EmbedResponse {

router/src/grpc/server.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ impl TextEmbeddingsService {
9191
truncation_direction,
9292
request.prompt_name,
9393
request.normalize,
94+
request.dimensions.map(|v| v as usize),
9495
permit,
9596
)
9697
.await

router/src/http/types.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,8 @@ pub(crate) struct EmbedRequest {
433433
#[schema(default = "true", example = "true")]
434434
pub normalize: bool,
435435

436+
/// The number of dimensions the resulting output embeddings should have. If not set, the orignal
437+
/// shape of the representation will be returned.
436438
#[schema(default = "null", example = "null", nullable = true)]
437439
pub dimensions: Option<usize>,
438440
}

0 commit comments

Comments
 (0)