@@ -145,7 +145,7 @@ services:
145145 # Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
146146 # Docs: https://docs.vllm.ai/projects/recipes/en/latest/OpenAI/GPT-OSS.html
147147 extends : common-base
148- image : vllm/vllm-openai:gptoss # TODO: once stabilized, revert to main releases
148+ image : vllm/vllm-openai:v0.10.2
149149 environment :
150150 - VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 # needed for NVIDIA A10 and A100 chips
151151 command :
@@ -154,7 +154,7 @@ services:
154154 # If you update anything here that could affect NLP results, consider updating the
155155 # task_version of any tasks that use this docker.
156156 - --model=openai/gpt-oss-120b
157- - --revision=bc75b44b8a2a116a0e4c6659bcd1b7969885f423
157+ - --revision=b5c939de8f754692c1647ca79fbf85e8c1e70f8a
158158 - --tensor-parallel-size=4
159159 shm_size : 32G
160160 healthcheck :
@@ -178,18 +178,17 @@ services:
178178 # Needs 80GB of GPU memory. A g6.12xlarge EC2 instance should work.
179179 # Docs: https://docs.vllm.ai/projects/recipes/en/latest/Llama/Llama4-Scout.html
180180 extends : common-base
181- image : vllm/vllm-openai:v0.10 .0
181+ image : vllm/vllm-openai:v0.11 .0
182182 environment :
183183 - HF_TOKEN
184184 - HUGGING_FACE_HUB_TOKEN
185- - VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 # needed for NVIDIA A10 and A100 chips
186185 command :
187186 - --download-dir=/data
188187 - --port=8087
189188 # If you update anything here that could affect NLP results, consider updating the
190189 # task_version of any tasks that use this docker.
191- - --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP8
192- - --revision=d1cf1e9db03b67e10422f97f38c8b546dec14789
190+ - --model=nvidia/Llama-4-Scout-17B-16E-Instruct-FP4
191+ - --revision=5588d5387bb37753ee29cde60e76910efc7fb4a9
193192 - --tensor-parallel-size=4
194193 shm_size : 32G
195194 healthcheck :
0 commit comments