Skip to content

Commit 2828127

Browse files
v0.6.0
1 parent 85e44a2 commit 2828127

File tree

13 files changed

+54
-54
lines changed

13 files changed

+54
-54
lines changed

.github/workflows/build_75.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@
109109
flavor: |
110110
latest=false
111111
tags: |
112-
type=semver,pattern=turing-{{version}}+grpc
113-
type=semver,pattern=turing-{{major}}.{{minor}}+grpc
114-
type=raw,value=turing-latest+grpc
115-
type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
112+
type=semver,pattern=turing-{{version}}-grpc
113+
type=semver,pattern=turing-{{major}}.{{minor}}-grpc
114+
type=raw,value=turing-latest-grpc
115+
type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
116116
- name: Build and push Docker image
117117
id: build-and-push-75-grpc
118118
uses: docker/build-push-action@v4

.github/workflows/build_80.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@
108108
flavor: |
109109
latest=false
110110
tags: |
111-
type=semver,pattern={{version}}+grpc
112-
type=semver,pattern={{major}}.{{minor}}+grpc
113-
type=raw,value=latest+grpc
114-
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}+grpc
111+
type=semver,pattern={{version}}-grpc
112+
type=semver,pattern={{major}}.{{minor}}-grpc
113+
type=raw,value=latest-grpc
114+
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-grpc
115115
- name: Build and push Docker image
116116
id: build-and-push-80-grpc
117117
uses: docker/build-push-action@v4

.github/workflows/build_86.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@
108108
flavor: |
109109
latest=false
110110
tags: |
111-
type=semver,pattern=86-{{version}}+grpc
112-
type=semver,pattern=86-{{major}}.{{minor}}+grpc
113-
type=raw,value=86-latest+grpc
114-
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
111+
type=semver,pattern=86-{{version}}-grpc
112+
type=semver,pattern=86-{{major}}.{{minor}}-grpc
113+
type=raw,value=86-latest-grpc
114+
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
115115
- name: Build and push Docker image
116116
id: build-and-push-86-grpc
117117
uses: docker/build-push-action@v4

.github/workflows/build_89.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@
108108
flavor: |
109109
latest=false
110110
tags: |
111-
type=semver,pattern=89-{{version}}+grpc
112-
type=semver,pattern=89-{{major}}.{{minor}}+grpc
113-
type=raw,value=89-latest+grpc
114-
type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
111+
type=semver,pattern=89-{{version}}-grpc
112+
type=semver,pattern=89-{{major}}.{{minor}}-grpc
113+
type=raw,value=89-latest-grpc
114+
type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
115115
- name: Build and push Docker image
116116
id: build-and-push-89-grpc
117117
uses: docker/build-push-action@v4

.github/workflows/build_90.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@
108108
flavor: |
109109
latest=false
110110
tags: |
111-
type=semver,pattern=hopper-{{version}}+grpc
112-
type=semver,pattern=hopper-{{major}}.{{minor}}+grpc
113-
type=raw,value=hopper-latest+grpc
114-
type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
111+
type=semver,pattern=hopper-{{version}}-grpc
112+
type=semver,pattern=hopper-{{major}}.{{minor}}-grpc
113+
type=raw,value=hopper-latest-grpc
114+
type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
115115
- name: Build and push Docker image
116116
id: build-and-push-90-grpc
117117
uses: docker/build-push-action@v4

.github/workflows/build_cpu.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@
107107
flavor: |
108108
latest=false
109109
tags: |
110-
type=semver,pattern=cpu-{{version}}+grpc
111-
type=semver,pattern=cpu-{{major}}.{{minor}}+grpc
112-
type=raw,value=cpu-latest+grpc
113-
type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
110+
type=semver,pattern=cpu-{{version}}-grpc
111+
type=semver,pattern=cpu-{{major}}.{{minor}}-grpc
112+
type=raw,value=cpu-latest-grpc
113+
type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}-grpc
114114
- name: Build and push Docker image
115115
id: build-and-push-cpu-grpc
116116
uses: docker/build-push-action@v4

Cargo.lock

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ members = [
1111
resolver = "2"
1212

1313
[workspace.package]
14-
version = "0.5.0"
14+
version = "0.6.0"
1515
edition = "2021"
1616
authors = ["Olivier Dehaene"]
1717
homepage = "https://github.com/huggingface/text-embeddings-inference"

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ model=BAAI/bge-large-en-v1.5
102102
revision=refs/pr/5
103103
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
104104

105-
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.5 --model-id $model --revision $revision
105+
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model --revision $revision
106106
```
107107

108108
And then you can make requests like
@@ -245,13 +245,13 @@ Text Embeddings Inference ships with multiple Docker images that you can use to
245245

246246
| Architecture | Image |
247247
|-------------------------------------|-------------------------------------------------------------------------|
248-
| CPU | ghcr.io/huggingface/text-embeddings-inference:cpu-0.5 |
248+
| CPU | ghcr.io/huggingface/text-embeddings-inference:cpu-0.6 |
249249
| Volta | NOT SUPPORTED |
250-
| Turing (T4, RTX 2000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:turing-0.5 (experimental) |
251-
| Ampere 80 (A100, A30) | ghcr.io/huggingface/text-embeddings-inference:0.5 |
252-
| Ampere 86 (A10, A40, ...) | ghcr.io/huggingface/text-embeddings-inference:86-0.5 |
253-
| Ada Lovelace (RTX 4000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:89-0.5 |
254-
| Hopper (H100) | ghcr.io/huggingface/text-embeddings-inference:hopper-0.5 (experimental) |
250+
| Turing (T4, RTX 2000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:turing-0.6 (experimental) |
251+
| Ampere 80 (A100, A30) | ghcr.io/huggingface/text-embeddings-inference:0.6 |
252+
| Ampere 86 (A10, A40, ...) | ghcr.io/huggingface/text-embeddings-inference:86-0.6 |
253+
| Ada Lovelace (RTX 4000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:89-0.6 |
254+
| Hopper (H100) | ghcr.io/huggingface/text-embeddings-inference:hopper-0.6 (experimental) |
255255

256256
**Warning**: Flash Attention is turned off by default for the Turing image as it suffers from precision issues.
257257
You can turn Flash Attention v1 ON by using the `USE_FLASH_ATTENTION=True` environment variable.
@@ -280,7 +280,7 @@ model=<your private model>
280280
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
281281
token=<your cli READ token>
282282

283-
docker run --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.5 --model-id $model
283+
docker run --gpus all -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model
284284
```
285285

286286
### Using Re-rankers models
@@ -298,7 +298,7 @@ model=BAAI/bge-reranker-large
298298
revision=refs/pr/4
299299
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
300300

301-
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.5 --model-id $model --revision $revision
301+
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model --revision $revision
302302
```
303303

304304
And then you can rank the similarity between a query and a list of texts with:
@@ -318,7 +318,7 @@ You can also use classic Sequence Classification models like `SamLowe/roberta-ba
318318
model=SamLowe/roberta-base-go_emotions
319319
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
320320

321-
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.5 --model-id $model
321+
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6 --model-id $model
322322
```
323323

324324
Once you have deployed the model you can use the `predict` endpoint to get the emotions most associated with an input:
@@ -340,14 +340,14 @@ by setting the address to an OTLP collector with the `--otlp-endpoint` argument.
340340
`text-embeddings-inference` offers a gRPC API as an alternative to the default HTTP API for high performance
341341
deployments. The API protobuf definition can be found [here](https://github.com/huggingface/text-embeddings-inference/blob/main/proto/tei.proto).
342342

343-
You can use the gRPC API by adding the `+grpc` tag to any TEI Docker image. For example:
343+
You can use the gRPC API by adding the `-grpc` tag to any TEI Docker image. For example:
344344

345345
```shell
346346
model=BAAI/bge-large-en-v1.5
347347
revision=refs/pr/5
348348
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
349349

350-
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.5+grpc --model-id $model --revision $revision
350+
docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingface/text-embeddings-inference:0.6-grpc --model-id $model --revision $revision
351351
```
352352

353353
```shell

docs/openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"license": {
1010
"name": "HFOIL"
1111
},
12-
"version": "0.5.0"
12+
"version": "0.6.0"
1313
},
1414
"paths": {
1515
"/embed": {

0 commit comments

Comments
 (0)