Skip to content

Commit d0dc4cf

Browse files
authored
Fix inadvertently silenced PP tests for mp, add DeepSeek V2/V3 model family to PP tests (#20831)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
1 parent d31a647 commit d0dc4cf

File tree

1 file changed

+17
-7
lines changed

1 file changed

+17
-7
lines changed

tests/distributed/test_pipeline_parallel.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414

1515
import pytest
1616

17-
from vllm.config import TaskOption
17+
from vllm.config import _FLOAT16_NOT_SUPPORTED_MODELS, TaskOption
1818
from vllm.logger import init_logger
19+
from vllm.transformers_utils.config import get_config
1920

2021
from ..models.registry import HF_EXAMPLE_MODELS
2122
from ..utils import compare_two_settings, create_new_process_for_each_test
@@ -158,7 +159,7 @@ def iter_params(self, model_id: str):
158159
"databricks/dbrx-instruct": PPTestSettings.fast(load_format="dummy"),
159160
"Deci/DeciLM-7B-instruct": PPTestSettings.fast(),
160161
"deepseek-ai/deepseek-llm-7b-chat": PPTestSettings.fast(),
161-
"deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(),
162+
"deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(tp_base=2),
162163
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct": PPTestSettings.fast(),
163164
"tiiuae/falcon-7b": PPTestSettings.fast(),
164165
"google/gemma-1.1-2b-it": PPTestSettings.fast(),
@@ -210,9 +211,11 @@ def iter_params(self, model_id: str):
210211

211212
EMBEDDING_MODELS = { # type: ignore[var-annotated]
212213
# [Text-only]
213-
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(),
214-
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(),
215-
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(load_format="dummy"),
214+
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(task="embed"),
215+
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(task="embed"),
216+
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(
217+
load_format="dummy", task="embed"
218+
),
216219
}
217220

218221
MULTIMODAL_MODELS = {
@@ -248,6 +251,7 @@ def iter_params(self, model_id: str):
248251
"meta-llama/Llama-3.2-1B-Instruct",
249252
"ArthurZ/Ilama-3.2-1B",
250253
"ibm/PowerLM-3b",
254+
"deepseek-ai/DeepSeek-V2-Lite-Chat",
251255
# [LANGUAGE EMBEDDING]
252256
"intfloat/e5-mistral-7b-instruct",
253257
"BAAI/bge-multilingual-gemma2",
@@ -287,6 +291,11 @@ def _compare_tp(
287291
trust_remote_code = model_info.trust_remote_code
288292
tokenizer_mode = model_info.tokenizer_mode
289293
hf_overrides = model_info.hf_overrides
294+
hf_config = get_config(model_id, trust_remote_code)
295+
296+
dtype = "float16"
297+
if hf_config.model_type in _FLOAT16_NOT_SUPPORTED_MODELS:
298+
dtype = "bfloat16"
290299

291300
if load_format == "dummy":
292301
# Avoid OOM
@@ -316,7 +325,7 @@ def _compare_tp(
316325
common_args = [
317326
# use half precision for speed and memory savings in CI environment
318327
"--dtype",
319-
"float16",
328+
dtype,
320329
"--max-model-len",
321330
"2048",
322331
"--max-num-seqs",
@@ -338,6 +347,7 @@ def _compare_tp(
338347
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
339348

340349
specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill
350+
testing_ray_compiled_graph = False
341351
if distributed_backend == "ray" and (vllm_major_version == "1"
342352
or specific_case):
343353
# For V1, test Ray Compiled Graph for all the tests
@@ -351,6 +361,7 @@ def _compare_tp(
351361
# Temporary. Currently when zeromq + SPMD is used, it does not properly
352362
# terminate because of a Ray Compiled Graph issue.
353363
common_args.append("--disable-frontend-multiprocessing")
364+
testing_ray_compiled_graph = True
354365
elif distributed_backend == "mp":
355366
# Both V0/V1 of multiprocessing executor support PP
356367
pp_env = {
@@ -394,7 +405,6 @@ def _compare_tp(
394405
tp_env,
395406
method=method)
396407
except Exception:
397-
testing_ray_compiled_graph = pp_env is not None
398408
if testing_ray_compiled_graph and vllm_major_version == "0":
399409
# Ray Compiled Graph tests are flaky for V0,
400410
# so we don't want to fail the test

0 commit comments

Comments
 (0)