Skip to content

Commit d3e5b5a

Browse files
feat: add grpc router (#90)
1 parent e10e10e commit d3e5b5a

28 files changed

+3220
-1555
lines changed

.github/workflows/build_75.yaml

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
tags: |
7878
type=semver,pattern=turing-{{version}}
7979
type=semver,pattern=turing-{{major}}.{{minor}}
80-
type=raw,value=turing-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
80+
type=raw,value=turing-latest
8181
type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}
8282
- name: Build and push Docker image
8383
id: build-and-push-75
@@ -99,3 +99,37 @@
9999
labels: ${{ steps.meta-75.outputs.labels }}
100100
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-75,mode=max
101101
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-75,mode=max
102+
- name: Extract metadata (tags, labels) for Docker
103+
id: meta-75-grpc
104+
uses: docker/metadata-action@v4.3.0
105+
with:
106+
images: |
107+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
108+
ghcr.io/huggingface/text-embeddings-inference
109+
flavor: |
110+
latest=false
111+
tags: |
112+
type=semver,pattern=turing-{{version}}+grpc
113+
type=semver,pattern=turing-{{major}}.{{minor}}+grpc
114+
type=raw,value=turing-latest+grpc
115+
type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
116+
- name: Build and push Docker image
117+
id: build-and-push-75-grpc
118+
uses: docker/build-push-action@v4
119+
with:
120+
context: .
121+
target: grpc
122+
file: Dockerfile-cuda
123+
push: ${{ github.event_name != 'pull_request' }}
124+
platforms: 'linux/amd64'
125+
build-args: |
126+
SCCACHE_GHA_ENABLED=on
127+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
128+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
129+
CUDA_COMPUTE_CAP=75
130+
GIT_SHA=${{ env.GITHUB_SHA }}
131+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
132+
DEFAULT_USE_FLASH_ATTENTION=False
133+
tags: ${{ steps.meta-75-grpc.outputs.tags }}
134+
labels: ${{ steps.meta-75-grpc.outputs.labels }}
135+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-75,mode=max

.github/workflows/build_80.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,36 @@
9898
labels: ${{ steps.meta-80.outputs.labels }}
9999
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max
100100
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max
101+
- name: Extract metadata (tags, labels) for Docker
102+
id: meta-80-grpc
103+
uses: docker/metadata-action@v4.3.0
104+
with:
105+
images: |
106+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
107+
ghcr.io/huggingface/text-embeddings-inference
108+
flavor: |
109+
latest=false
110+
tags: |
111+
type=semver,pattern={{version}}+grpc
112+
type=semver,pattern={{major}}.{{minor}}+grpc
113+
type=raw,value=latest+grpc
114+
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}+grpc
115+
- name: Build and push Docker image
116+
id: build-and-push-80-grpc
117+
uses: docker/build-push-action@v4
118+
with:
119+
context: .
120+
target: grpc
121+
file: Dockerfile-cuda
122+
push: ${{ github.event_name != 'pull_request' }}
123+
platforms: 'linux/amd64'
124+
build-args: |
125+
SCCACHE_GHA_ENABLED=on
126+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
127+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
128+
CUDA_COMPUTE_CAP=80
129+
GIT_SHA=${{ env.GITHUB_SHA }}
130+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
131+
tags: ${{ steps.meta-80-grpc.outputs.tags }}
132+
labels: ${{ steps.meta-80-grpc.outputs.labels }}
133+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max

.github/workflows/build_86.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,37 @@
9898
labels: ${{ steps.meta-86.outputs.labels }}
9999
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max
100100
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max
101+
- name: Extract metadata (tags, labels) for Docker
102+
id: meta-86-grpc
103+
uses: docker/metadata-action@v4.3.0
104+
with:
105+
images: |
106+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
107+
ghcr.io/huggingface/text-embeddings-inference
108+
flavor: |
109+
latest=false
110+
tags: |
111+
type=semver,pattern=86-{{version}}+grpc
112+
type=semver,pattern=86-{{major}}.{{minor}}+grpc
113+
type=raw,value=86-latest+grpc
114+
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
115+
- name: Build and push Docker image
116+
id: build-and-push-86-grpc
117+
uses: docker/build-push-action@v4
118+
with:
119+
context: .
120+
target: grpc
121+
file: Dockerfile-cuda
122+
push: ${{ github.event_name != 'pull_request' }}
123+
platforms: 'linux/amd64'
124+
build-args: |
125+
SCCACHE_GHA_ENABLED=on
126+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
127+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
128+
CUDA_COMPUTE_CAP=86
129+
GIT_SHA=${{ env.GITHUB_SHA }}
130+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
131+
tags: ${{ steps.meta-86-grpc.outputs.tags }}
132+
labels: ${{ steps.meta-86-grpc.outputs.labels }}
133+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=max
134+

.github/workflows/build_89.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,37 @@
9898
labels: ${{ steps.meta-89.outputs.labels }}
9999
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-89,mode=max
100100
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-89,mode=max
101+
- name: Extract metadata (tags, labels) for Docker
102+
id: meta-89-grpc
103+
uses: docker/metadata-action@v4.3.0
104+
with:
105+
images: |
106+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
107+
ghcr.io/huggingface/text-embeddings-inference
108+
flavor: |
109+
latest=false
110+
tags: |
111+
type=semver,pattern=89-{{version}}+grpc
112+
type=semver,pattern=89-{{major}}.{{minor}}+grpc
113+
type=raw,value=89-latest+grpc
114+
type=raw,value=89-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
115+
- name: Build and push Docker image
116+
id: build-and-push-89-grpc
117+
uses: docker/build-push-action@v4
118+
with:
119+
context: .
120+
target: grpc
121+
file: Dockerfile-cuda
122+
push: ${{ github.event_name != 'pull_request' }}
123+
platforms: 'linux/amd64'
124+
build-args: |
125+
SCCACHE_GHA_ENABLED=on
126+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
127+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
128+
CUDA_COMPUTE_CAP=89
129+
GIT_SHA=${{ env.GITHUB_SHA }}
130+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
131+
tags: ${{ steps.meta-89-grpc.outputs.tags }}
132+
labels: ${{ steps.meta-89-grpc.outputs.labels }}
133+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-89,mode=max
134+

.github/workflows/build_90.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,39 @@
9898
labels: ${{ steps.meta-90.outputs.labels }}
9999
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-90,mode=max
100100
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-90,mode=max
101+
- name: Extract metadata (tags, labels) for Docker
102+
id: meta-90-grpc
103+
uses: docker/metadata-action@v4.3.0
104+
with:
105+
images: |
106+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
107+
ghcr.io/huggingface/text-embeddings-inference
108+
flavor: |
109+
latest=false
110+
tags: |
111+
type=semver,pattern=hopper-{{version}}+grpc
112+
type=semver,pattern=hopper-{{major}}.{{minor}}+grpc
113+
type=raw,value=hopper-latest+grpc
114+
type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
115+
- name: Build and push Docker image
116+
id: build-and-push-90-grpc
117+
uses: docker/build-push-action@v4
118+
with:
119+
context: .
120+
target: grpc
121+
file: Dockerfile-cuda
122+
push: ${{ github.event_name != 'pull_request' }}
123+
platforms: 'linux/amd64'
124+
build-args: |
125+
SCCACHE_GHA_ENABLED=on
126+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
127+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
128+
CUDA_COMPUTE_CAP=90
129+
GIT_SHA=${{ env.GITHUB_SHA }}
130+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
131+
tags: ${{ steps.meta-90-grpc.outputs.tags }}
132+
labels: ${{ steps.meta-90-grpc.outputs.labels }}
133+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-90,mode=max
134+
135+
101136

.github/workflows/build_cpu.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,36 @@
9797
labels: ${{ steps.meta-cpu.outputs.labels }}
9898
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-cpu,mode=max
9999
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-cpu,mode=max
100+
- name: Extract metadata (tags, labels) for Docker
101+
id: meta-cpu-grpc
102+
uses: docker/metadata-action@v4.3.0
103+
with:
104+
images: |
105+
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
106+
ghcr.io/huggingface/text-embeddings-inference
107+
flavor: |
108+
latest=false
109+
tags: |
110+
type=semver,pattern=cpu-{{version}}+grpc
111+
type=semver,pattern=cpu-{{major}}.{{minor}}+grpc
112+
type=raw,value=cpu-latest+grpc
113+
type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}+grpc
114+
- name: Build and push Docker image
115+
id: build-and-push-cpu-grpc
116+
uses: docker/build-push-action@v4
117+
with:
118+
context: .
119+
target: grpc
120+
file: Dockerfile
121+
push: ${{ github.event_name != 'pull_request' }}
122+
platforms: 'linux/amd64'
123+
build-args: |
124+
SCCACHE_GHA_ENABLED=on
125+
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
126+
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
127+
GIT_SHA=${{ env.GITHUB_SHA }}
128+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
129+
tags: ${{ steps.meta-cpu-grpc.outputs.tags }}
130+
labels: ${{ steps.meta-cpu-grpc.outputs.labels }}
131+
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-cpu,mode=max
132+

0 commit comments

Comments
 (0)