Skip to content

Commit 8c36b92

Browse files
authored
Merge pull request #457 from smart-on-fhir/mikix/vllm-fixes
Update vllm models
2 parents 09f18e0 + ad675eb commit 8c36b92

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

compose.yaml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ services:
145145
# Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
146146
# Docs: https://docs.vllm.ai/projects/recipes/en/latest/OpenAI/GPT-OSS.html
147147
extends: common-base
148-
image: vllm/vllm-openai:gptoss # TODO: once stabilized, revert to main releases
148+
image: vllm/vllm-openai:v0.10.2
149149
environment:
150150
- VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 # needed for NVIDIA A10 and A100 chips
151151
command:
@@ -154,7 +154,7 @@ services:
154154
# If you update anything here that could affect NLP results, consider updating the
155155
# task_version of any tasks that use this docker.
156156
- --model=openai/gpt-oss-120b
157-
- --revision=bc75b44b8a2a116a0e4c6659bcd1b7969885f423
157+
- --revision=b5c939de8f754692c1647ca79fbf85e8c1e70f8a
158158
- --tensor-parallel-size=4
159159
shm_size: 32G
160160
healthcheck:
@@ -178,18 +178,17 @@ services:
178178
# Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
179179
# Docs: https://docs.vllm.ai/projects/recipes/en/latest/Llama/Llama4-Scout.html
180180
extends: common-base
181-
image: vllm/vllm-openai:v0.10.0
181+
image: vllm/vllm-openai:v0.11.0
182182
environment:
183183
- HF_TOKEN
184184
- HUGGING_FACE_HUB_TOKEN
185-
- VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 # needed for NVIDIA A10 and A100 chips
186185
command:
187186
- --download-dir=/data
188187
- --port=8087
189188
# If you update anything here that could affect NLP results, consider updating the
190189
# task_version of any tasks that use this docker.
191-
- --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP8
192-
- --revision=d1cf1e9db03b67e10422f97f38c8b546dec14789
190+
- --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP4
191+
- --revision=5588d5387bb37753ee29cde60e76910efc7fb4a9
193192
- --tensor-parallel-size=4
194193
shm_size: 32G
195194
healthcheck:

cumulus_etl/nlp/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ class Llama4ScoutModel(Model):
298298
AZURE_ID = "Llama-4-Scout-17B-16E-Instruct"
299299
BEDROCK_ID = "us.meta.llama4-scout-17b-instruct-v1:0"
300300
COMPOSE_ID = "llama4-scout"
301-
VLLM_INFO = ("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8", "LLAMA4_SCOUT", 8087)
301+
VLLM_INFO = ("nvidia/Llama-4-Scout-17B-16E-Instruct-FP4", "LLAMA4_SCOUT", 8087)
302302

303303

304304
class ClaudeSonnet45Model(Model):

0 commit comments

Comments
 (0)