Skip to content

Commit 8f2e33e

Browse files
committed
add ut fot cv parallel
Signed-off-by: David9857 <985700846@qq.com>
1 parent a7195df commit 8f2e33e

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

tests/multicard/test_offline_inference_distributed.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,29 @@ def test_models_distributed_DeepSeek():
6464
distributed_executor_backend="mp",
6565
) as vllm_model:
6666
vllm_model.generate_greedy(example_prompts, max_tokens)
67+
68+
69+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "1",
70+
reason="deepseek v2 lite is not supported on v1")
71+
def test_models_distributed_DeepSeek_with_cv_parallel():
72+
example_prompts = [
73+
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
74+
"Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020.",
75+
"Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
76+
]
77+
dtype = "half"
78+
max_tokens = 5
79+
kwargs = {
80+
'additional_config': {
81+
'enable_graph_mode': True,
82+
'enable_cv_parallel': True
83+
}
84+
}
85+
with VllmRunner(
86+
"deepseek-ai/DeepSeek-V2-Lite",
87+
dtype=dtype,
88+
tensor_parallel_size=4,
89+
distributed_executor_backend="mp",
90+
**kwargs
91+
) as vllm_model:
92+
vllm_model.generate_greedy(example_prompts, max_tokens)

0 commit comments

Comments
 (0)