Skip to content

Commit 5188718

Browse files
feat: add 89 target
1 parent 1135306 commit 5188718

File tree

3 files changed

+221
-15
lines changed

3 files changed

+221
-15
lines changed

.github/workflows/build_89.yaml

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
name: Build and push Cuda RTX 4000 series docker image to registry
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
branches:
7+
- 'main'
8+
tags:
9+
- 'v*'
10+
pull_request:
11+
paths:
12+
- ".github/workflows/build.yaml"
13+
# - "integration-tests/**"
14+
- "backends/**"
15+
- "core/**"
16+
- "router/**"
17+
- "Cargo.lock"
18+
- "rust-toolchain.toml"
19+
- "Dockerfile"
20+
branches:
21+
- 'main'
22+
23+
jobs:
24+
start-runner:
25+
name: Start self-hosted EC2 runner
26+
runs-on: ubuntu-latest
27+
env:
28+
AWS_REGION: us-east-1
29+
EC2_AMI_ID: ami-03cfed9ea28f4b002
30+
EC2_INSTANCE_TYPE: r5.8xlarge
31+
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
32+
EC2_SECURITY_GROUP: sg-030175c435ac141d6
33+
outputs:
34+
label: ${{ steps.start-ec2-runner.outputs.label }}
35+
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
36+
steps:
37+
- name: Configure AWS credentials
38+
uses: aws-actions/configure-aws-credentials@v1
39+
with:
40+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
41+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
42+
aws-region: ${{ env.AWS_REGION }}
43+
- name: Start EC2 runner
44+
id: start-ec2-runner
45+
uses: philschmid/philschmid-ec2-github-runner@main
46+
with:
47+
mode: start
48+
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
49+
ec2-image-id: ${{ env.EC2_AMI_ID }}
50+
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
51+
subnet-id: ${{ env.EC2_SUBNET_ID }}
52+
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
53+
aws-resource-tags: > # optional, requires additional permissions
54+
[
55+
{"Key": "Name", "Value": "ec2-tei-github-runner"},
56+
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
57+
]
58+
59+
build-and-push-image:
60+
concurrency:
61+
group: ${{ github.workflow }}-${{ github.job }}-89-${{ github.head_ref || github.run_id }}
62+
cancel-in-progress: true
63+
needs: start-runner # required to start the main job when the runner is ready
64+
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
65+
permissions:
66+
contents: write
67+
packages: write
68+
# This is used to complete the identity challenge
69+
# with sigstore/fulcio when running outside of PRs.
70+
id-token: write
71+
security-events: write
72+
steps:
73+
- name: Checkout repository
74+
uses: actions/checkout@v3
75+
- name: Initialize Docker Buildx
76+
uses: docker/setup-buildx-action@v2.0.0
77+
with:
78+
install: true
79+
- name: Configure sccache
80+
uses: actions/github-script@v6
81+
with:
82+
script: |
83+
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
84+
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
85+
- name: Inject slug/short variables
86+
uses: rlespinasse/github-slug-action@v4.4.1
87+
- name: Tailscale
88+
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
89+
with:
90+
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
91+
- name: Login to GitHub Container Registry
92+
if: github.event_name != 'pull_request'
93+
uses: docker/login-action@v2
94+
with:
95+
registry: ghcr.io
96+
username: ${{ github.actor }}
97+
password: ${{ secrets.GITHUB_TOKEN }}
98+
- name: Login to internal Container Registry
99+
uses: docker/login-action@v2.1.0
100+
with:
101+
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
102+
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
103+
registry: registry.internal.huggingface.tech
104+
- name: Extract metadata (tags, labels) for Docker
105+
id: meta-89
106+
uses: docker/metadata-action@v4.3.0
107+
with:
108+
images: |
109+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
110+
ghcr.io/huggingface/text-embeddings-inference
111+
tags: |
112+
type=semver,pattern=89-{{version}}
113+
type=semver,pattern=89-{{major}}.{{minor}}
114+
type=raw,value=89-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
115+
type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }}
116+
- name: Build and push Docker image
117+
id: build-and-push-89
118+
uses: docker/build-push-action@v4
119+
with:
120+
context: .
121+
file: Dockerfile-cuda
122+
push: ${{ github.event_name != 'pull_request' }}
123+
platforms: 'linux/amd64'
124+
build-args: |
125+
SCCACHE_GHA_ENABLED=on
126+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
127+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
128+
CUDA_COMPUTE_CAP=89
129+
GIT_SHA=${{ env.GITHUB_SHA }}
130+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
131+
tags: ${{ steps.meta-89.outputs.tags }}
132+
labels: ${{ steps.meta-89.outputs.labels }}
133+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-89,mode=max
134+
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-89,mode=max
135+
136+
stop-runner:
137+
name: Stop self-hosted EC2 runner
138+
needs:
139+
- start-runner
140+
- build-and-push-image
141+
runs-on: ubuntu-latest
142+
env:
143+
AWS_REGION: us-east-1
144+
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
145+
steps:
146+
- name: Configure AWS credentials
147+
uses: aws-actions/configure-aws-credentials@v1
148+
with:
149+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
150+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
151+
aws-region: ${{ env.AWS_REGION }}
152+
- name: Stop EC2 runner
153+
uses: philschmid/philschmid-ec2-github-runner@main
154+
with:
155+
mode: stop
156+
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
157+
label: ${{ needs.start-runner.outputs.label }}
158+
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

README.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,15 @@ Options:
219219

220220
Text Embeddings Inference ships with multiple Docker images that you can use to target a specific backend:
221221

222-
| Architecture | Image |
223-
|-----------------------------------|-------------------------------------------------------------|
224-
| CPU | ghcr.io/huggingface/text-embeddings-inference:cpu-latest |
225-
| Volta | NOT SUPPORTED |
226-
| Turing (T4, RTX 2000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:turing-latest |
227-
| Ampere 80 (A100, A30) | ghcr.io/huggingface/text-embeddings-inference:latest |
228-
| Ampere 86 (A10, A40, ...) | ghcr.io/huggingface/text-embeddings-inference:86-latest |
229-
| Hopper (H100) | ghcr.io/huggingface/text-embeddings-inference:hopper-latest |
222+
| Architecture | Image |
223+
|-------------------------------------|-------------------------------------------------------------|
224+
| CPU | ghcr.io/huggingface/text-embeddings-inference:cpu-latest |
225+
| Volta | NOT SUPPORTED |
226+
| Turing (T4, RTX 2000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:turing-latest |
227+
| Ampere 80 (A100, A30) | ghcr.io/huggingface/text-embeddings-inference:latest |
228+
| Ampere 86 (A10, A40, ...) | ghcr.io/huggingface/text-embeddings-inference:86-latest |
229+
| Ada Lovelace (RTX 4000 series, ...) | ghcr.io/huggingface/text-embeddings-inference:89-latest |
230+
| Hopper (H100) | ghcr.io/huggingface/text-embeddings-inference:hopper-latest |
230231

231232
### API documentation
232233

@@ -350,6 +351,9 @@ runtime_compute_cap=80
350351
# Example for A10
351352
runtime_compute_cap=86
352353

354+
# Example for Ada Lovelace (RTX 4000 series, ...)
355+
runtime_compute_cap=89
356+
353357
# Example for H100
354358
runtime_compute_cap=90
355359

backends/candle/src/compute_cap.rs

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,59 @@ lazy_static! {
1818
pub static ref COMPILE_COMPUTE_CAP: usize = env!("CUDA_COMPUTE_CAP").parse::<usize>().unwrap();
1919
}
2020

21+
fn compute_cap_matching(runtime_compute_cap: usize, compile_compute_cap: usize) -> bool {
22+
match (runtime_compute_cap, compile_compute_cap) {
23+
(75, 75) => true,
24+
(80..=89, 80) => true,
25+
(86..=89, 80..=86) => true,
26+
(89, 89) => true,
27+
(90, 90) => true,
28+
(_, _) => false,
29+
}
30+
}
31+
2132
pub fn incompatible_compute_cap() -> bool {
2233
let compile_compute_cap = *COMPILE_COMPUTE_CAP;
2334
let runtime_compute_cap = *RUNTIME_COMPUTE_CAP;
35+
!compute_cap_matching(runtime_compute_cap, compile_compute_cap)
36+
}
2437

25-
match (runtime_compute_cap, compile_compute_cap) {
26-
(75, 75) => false,
27-
(80, 80) => false,
28-
(86, 80..=86) => false,
29-
(90, 90) => false,
30-
(_, _) => true,
38+
#[cfg(test)]
39+
mod tests {
40+
use crate::compute_cap::compute_cap_matching;
41+
42+
#[test]
43+
fn test_compute_cap() {
44+
assert!(compute_cap_matching(75, 75));
45+
assert!(compute_cap_matching(80, 80));
46+
assert!(compute_cap_matching(86, 86));
47+
assert!(compute_cap_matching(89, 89));
48+
assert!(compute_cap_matching(90, 90));
49+
50+
assert!(compute_cap_matching(86, 80));
51+
assert!(compute_cap_matching(89, 80));
52+
assert!(compute_cap_matching(89, 86));
53+
54+
assert!(!compute_cap_matching(75, 80));
55+
assert!(!compute_cap_matching(75, 86));
56+
assert!(!compute_cap_matching(75, 89));
57+
assert!(!compute_cap_matching(75, 90));
58+
59+
assert!(!compute_cap_matching(80, 75));
60+
assert!(!compute_cap_matching(80, 86));
61+
assert!(!compute_cap_matching(80, 89));
62+
assert!(!compute_cap_matching(80, 90));
63+
64+
assert!(!compute_cap_matching(86, 75));
65+
assert!(!compute_cap_matching(86, 89));
66+
assert!(!compute_cap_matching(86, 90));
67+
68+
assert!(!compute_cap_matching(89, 75));
69+
assert!(!compute_cap_matching(89, 90));
70+
71+
assert!(!compute_cap_matching(90, 75));
72+
assert!(!compute_cap_matching(90, 80));
73+
assert!(!compute_cap_matching(90, 86));
74+
assert!(!compute_cap_matching(90, 89));
3175
}
32-
}
76+
}

0 commit comments

Comments
 (0)