Skip to content

Commit 30577ac

Browse files
committed
Update vllm models
1 parent 09f18e0 commit 30577ac

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

compose.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ services:
145145
# Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
146146
# Docs: https://docs.vllm.ai/projects/recipes/en/latest/OpenAI/GPT-OSS.html
147147
extends: common-base
148-
image: vllm/vllm-openai:gptoss # TODO: once stabilized, revert to main releases
148+
image: vllm/vllm-openai:v0.10.2
149149
environment:
150150
- VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 # needed for NVIDIA A10 and A100 chips
151151
command:
@@ -154,7 +154,7 @@ services:
154154
# If you update anything here that could affect NLP results, consider updating the
155155
# task_version of any tasks that use this docker.
156156
- --model=openai/gpt-oss-120b
157-
- --revision=bc75b44b8a2a116a0e4c6659bcd1b7969885f423
157+
- --revision=b5c939de8f754692c1647ca79fbf85e8c1e70f8a
158158
- --tensor-parallel-size=4
159159
shm_size: 32G
160160
healthcheck:
@@ -174,11 +174,11 @@ services:
174174
devices:
175175
- capabilities: [gpu]
176176

177-
llama4-scout: # WIP, have not gotten it to run successfully yet
177+
llama4-scout:
178178
# Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
179179
# Docs: https://docs.vllm.ai/projects/recipes/en/latest/Llama/Llama4-Scout.html
180180
extends: common-base
181-
image: vllm/vllm-openai:v0.10.0
181+
image: vllm/vllm-openai:v0.10.2
182182
environment:
183183
- HF_TOKEN
184184
- HUGGING_FACE_HUB_TOKEN
@@ -188,8 +188,8 @@ services:
188188
- --port=8087
189189
# If you update anything here that could affect NLP results, consider updating the
190190
# task_version of any tasks that use this docker.
191-
- --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP8
192-
- --revision=d1cf1e9db03b67e10422f97f38c8b546dec14789
191+
- --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP4
192+
- --revision=5588d5387bb37753ee29cde60e76910efc7fb4a9
193193
- --tensor-parallel-size=4
194194
shm_size: 32G
195195
healthcheck:

cumulus_etl/nlp/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ class Llama4ScoutModel(Model):
298298
AZURE_ID = "Llama-4-Scout-17B-16E-Instruct"
299299
BEDROCK_ID = "us.meta.llama4-scout-17b-instruct-v1:0"
300300
COMPOSE_ID = "llama4-scout"
301-
VLLM_INFO = ("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8", "LLAMA4_SCOUT", 8087)
301+
VLLM_INFO = ("nvidia/Llama-4-Scout-17B-16E-Instruct-FP4", "LLAMA4_SCOUT", 8087)
302302

303303

304304
class ClaudeSonnet45Model(Model):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "cumulus-etl"
33
requires-python = ">= 3.11"
44
dependencies = [
5-
"aiobotocore < 2.22.0", # FIXME: temp hotfix for dependency version madness - remove later
5+
"s3fs[boto3]", # specify this early to resolve some botocore/aiobotocore dependency madness
66
"ctakesclient >= 5.1",
77
"cumulus-fhir-support >= 1.6",
88
"delta-spark >= 4, < 5",

0 commit comments

Comments
 (0)