Skip to content

Commit 11998af

Browse files
committed
cutting a release to update quay prod image in llm-d
2 parents 416e299 + 774e685 commit 11998af

18 files changed

+682
-97
lines changed

.tekton/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ Each repo includes a `.version.json` file at its root. This file controls:
2020
```json
2121
{
2222
"dev-version": "0.0.5",
23-
"dev-registry": "quay.io/vllm-d/<your project name>-dev",
23+
"dev-registry": "quay.io/llm-d/<your project name>-dev",
2424
"prod-version": "0.0.4",
25-
"prod-registry": "quay.io/vllm-d/<your project name>"
25+
"prod-registry": "quay.io/llm-d/<your project name>"
2626
}
2727
```
2828

@@ -43,8 +43,8 @@ The pipeline reads this file to:
4343

4444
This pipeline maintains two container repositories for this GitHub repository, as follows.
4545

46-
- `quay.io/vllm-d/<repoName>-dev`. Hold builds from the `dev` branch as described below.
47-
- `quay.io/vllm-d/<repoName>`. Holds promotions to prod, as described below.
46+
- `quay.io/llm-d/<repoName>-dev`. Hold builds from the `dev` branch as described below.
47+
- `quay.io/llm-d/<repoName>`. Holds promotions to prod, as described below.
4848

4949
---
5050

.tekton/buildah-build.yaml

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,9 @@ spec:
1414
workspaces:
1515
- name: source
1616
- name: registry
17+
- name: container-storage
18+
mountPath: /var/lib/containers
1719
steps:
18-
# - name: setup-qemu
19-
# image: us.icr.io/ibm-hc4ai-operator/qemu-user-static:latest
20-
# imagePullPolicy: IfNotPresent
21-
# securityContext:
22-
# privileged: true
23-
# script: |
24-
# #!/bin/sh
25-
# echo "⚙️ Attempting QEMU registration..."
26-
# /register || echo "ℹ️ Skipping errors – QEMU might already be registered"
27-
2820
- name: build
2921
image: quay.io/buildah/stable:latest
3022
imagePullPolicy: IfNotPresent
@@ -33,7 +25,7 @@ spec:
3325
privileged: true
3426
env:
3527
- name: STORAGE_DRIVER
36-
value: vfs
28+
value: overlay
3729
script: |
3830
#!/bin/sh
3931
set -e

.tekton/pipelinerun.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ spec:
326326
workspace: source
327327
- name: registry
328328
workspace: registry-secret
329+
- name: container-storage
330+
workspace: container-storage
329331

330332
- name: vulnerability-scan
331333
when:
@@ -606,6 +608,9 @@ spec:
606608
name: noop-task
607609

608610
workspaces:
611+
- name: container-storage
612+
persistentVolumeClaim:
613+
claimName: buildah-cache6
609614
- name: source
610615
volumeClaimTemplate:
611616
spec:
@@ -630,4 +635,4 @@ spec:
630635
secretName: "git-auth-secret-neuralmagic"
631636
- name: registry-secret
632637
secret:
633-
secretName: quay-secret
638+
secretName: quay-secret-llm-d

.version.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"dev-version": "0.0.4",
3-
"dev-registry": "quay.io/vllm-d/vllm-sim-dev",
3+
"dev-registry": "quay.io/llm-d/vllm-sim-dev",
44
"prod-version": "0.0.3",
5-
"prod-registry": "quay.io/vllm-d/vllm-sim"
5+
"prod-registry": "quay.io/llm-d/vllm-sim"
66
}

Makefile

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ SHELL := /usr/bin/env bash
3939
PROJECT_NAME ?= vllm-sim
4040
DEV_VERSION ?= 0.0.1
4141
PROD_VERSION ?= 0.0.0
42-
IMAGE_TAG_BASE ?= quay.io/vllm-d/$(PROJECT_NAME)
42+
IMAGE_TAG_BASE ?= quay.io/llm-d/$(PROJECT_NAME)
4343
IMG = $(IMAGE_TAG_BASE):$(DEV_VERSION)
4444
NAMESPACE ?= hc4ai-operator
4545

@@ -92,16 +92,23 @@ buildah-build: check-builder load-version-json ## Build and push image (multi-ar
9292
@echo "✅ Using builder: $(BUILDER)"
9393
@if [ "$(BUILDER)" = "buildah" ]; then \
9494
echo "🔧 Buildah detected: Performing multi-arch build..."; \
95+
FINAL_TAG=$(IMG); \
9596
for arch in amd64; do \
97+
ARCH_TAG=$$FINAL_TAG-$$arch; \
9698
echo "📦 Building for architecture: $$arch"; \
97-
buildah build --arch=$$arch --os=linux -t $(IMG)-$$arch . || exit 1; \
98-
echo "🚀 Pushing image: $(IMG)-$$arch"; \
99-
buildah push $(IMG)-$$arch docker://$(IMG)-$$arch || exit 1; \
99+
buildah build --arch=$$arch --os=linux --layers -t $(IMG)-$$arch . || exit 1; \
100+
echo "🚀 Pushing image: $$ARCH_TAG"; \
101+
buildah push $$ARCH_TAG docker://$$ARCH_TAG || exit 1; \
100102
done; \
101-
echo "🧱 Creating and pushing manifest list: $(IMG)"; \
102-
buildah manifest create $(IMG); \
103-
buildah manifest add $(IMG) $(IMG)-amd64; \
104-
buildah manifest push --all $(IMG) docker://$(IMG); \
103+
echo "🧼 Removing existing manifest (if any)..."; \
104+
buildah manifest rm $$FINAL_TAG || true; \
105+
echo "🧱 Creating and pushing manifest list: $$FINAL_TAG"; \
106+
buildah manifest create $$FINAL_TAG; \
107+
for arch in amd64; do \
108+
ARCH_TAG=$$FINAL_TAG-$$arch; \
109+
buildah manifest add $$FINAL_TAG $$ARCH_TAG; \
110+
done; \
111+
buildah manifest push --all $$FINAL_TAG docker://$$FINAL_TAG; \
105112
elif [ "$(BUILDER)" = "docker" ]; then \
106113
echo "🐳 Docker detected: Building with buildx..."; \
107114
sed -e '1 s/\(^FROM\)/FROM --platform=$${BUILDPLATFORM}/' Dockerfile > Dockerfile.cross; \
@@ -259,7 +266,7 @@ load-version-json: check-jq
259266
export DEV_VERSION; \
260267
export PROD_VERSION; \
261268
fi && \
262-
CURRENT_DEFAULT="quay.io/vllm-d/$(PROJECT_NAME)"; \
269+
CURRENT_DEFAULT="quay.io/llm-d/$(PROJECT_NAME)"; \
263270
if [ "$(IMAGE_TAG_BASE)" = "$$CURRENT_DEFAULT" ]; then \
264271
IMAGE_TAG_BASE=$$(jq -r '."dev-registry"' .version.json); \
265272
echo "✔ Loaded IMAGE_TAG_BASE from .version.json: $$IMAGE_TAG_BASE"; \

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
8181
- `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero
8282
- `max-loras`: maximum number of LoRAs in a single batch, optional, default is one
8383
- `max-cpu-loras`: maximum number of LoRAs to store in CPU memory, optional, must be >= than max_loras, default is max_loras
84+
- `max-running-requests`: maximum number of inference requests that could be processed at the same time
8485

8586

8687
## Working with docker image
@@ -113,4 +114,3 @@ To run the router in a standalone test environment, run:
113114
```
114115

115116

116-

cmd/vllm-sim/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ func main() {
3131
// setup logger and context with graceful shutdown
3232
logger := klog.Background()
3333
ctx := klog.NewContext(context.Background(), logger)
34-
_ = signals.SetupSignalHandler(ctx)
34+
ctx = signals.SetupSignalHandler(ctx)
3535

3636
logger.Info("Start vllm simulator")
3737

3838
vllmSim := vllmsim.New(logger)
39-
err := vllmSim.Start()
39+
err := vllmSim.Start(ctx)
4040

4141
if err != nil {
4242
logger.Error(err, "VLLM simulator failed")

deploy/common/statefulset.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ spec:
1616
serviceAccountName: operator-controller-manager
1717
containers:
1818
- name: cmd
19-
image: quay.io/vllm-d/placeholder:placeholder
19+
image: quay.io/llm-d/placeholder:placeholder
2020
imagePullPolicy: Always

deploy/kustomization.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ configMapGenerator:
2525

2626
# Define the image to be updated.
2727
# images:
28-
# - name: quay.io/vllm-d/placeholder
29-
# newName: quay.io/vllm-d/${IMAGE_TAG_BASE}
28+
# - name: quay.io/llm-d/placeholder
29+
# newName: quay.io/llm-d/${IMAGE_TAG_BASE}
3030
# newTag: ${VERSION}
3131
patches:
3232
- path: common/patch-service.yaml

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/google/uuid v1.6.0
1111
github.com/onsi/ginkgo/v2 v2.23.4
1212
github.com/onsi/gomega v1.37.0
13+
github.com/openai/openai-go v0.1.0-beta.10
1314
github.com/prometheus/client_golang v1.21.1
1415
github.com/spf13/pflag v1.0.6
1516
github.com/valyala/fasthttp v1.59.0
@@ -30,6 +31,10 @@ require (
3031
github.com/prometheus/client_model v0.6.1 // indirect
3132
github.com/prometheus/common v0.62.0 // indirect
3233
github.com/prometheus/procfs v0.15.1 // indirect
34+
github.com/tidwall/gjson v1.18.0 // indirect
35+
github.com/tidwall/match v1.1.1 // indirect
36+
github.com/tidwall/pretty v1.2.1 // indirect
37+
github.com/tidwall/sjson v1.2.5 // indirect
3338
github.com/valyala/bytebufferpool v1.0.0 // indirect
3439
go.uber.org/automaxprocs v1.6.0 // indirect
3540
golang.org/x/net v0.38.0 // indirect

0 commit comments

Comments
 (0)