Skip to content

Commit 089d85c

Browse files
committed
info-fix
1 parent 3b17f54 commit 089d85c

File tree

1 file changed

+5
-46
lines changed

1 file changed

+5
-46
lines changed

vllm-torch-nightly/vllm_tests.sh

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cd tests
66

77
################# ENTRYPONT TESTS #################
88
# Comments: all passed except entrypoints/llm/test_guided_generate.py
9-
9+
# Notes: currently all entrypoint tests are automatically run with V1 VLLM engine.
1010
export VLLM_WORKER_MULTIPROC_METHOD=spawn
1111
pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
1212
pytest -v -s entrypoints/llm/test_lazy_outlines.py
@@ -20,6 +20,10 @@ VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode
2020
################# ENTRYPONT TESTS #################
2121

2222
################# v1 tests #################
23+
# Notes: the v1/endgine test failed with nightly torch 0419, bisect to 0415, and the test passed. the error is due to
24+
# RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
25+
# This seems to be a torch issue.
26+
2327
# lib dependency: need lm-eval[api]==0.4.8
2428
pytest -v -s v1/core
2529
pytest -v -s v1/engine
@@ -35,56 +39,11 @@ pytest -v -s v1/e2e
3539
pip install -U git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
3640
pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
3741
################# v1 tests #################
38-
39-
#### v1 failed tests ####
40-
# two errors
41-
42-
# Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
43-
# v1/engine/test_engine_core.py::test_engine_core
44-
# v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
45-
# v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
46-
47-
48-
49-
pytest -v -s v1/engine/test_engine_core.py
50-
51-
52-
# Exception: Call to echo method failed: 'EngineCoreProc' object has no attribute 'echo'
53-
tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True] \
54-
tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False] \
55-
tests/v1/engine/test_engine_core_client.py::test_engine_core_client_asyncio
56-
5742
##########Chunked Prefill Test #################
5843
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
5944
VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
6045
##########Chunked Prefill Test #################
6146

62-
63-
64-
scp -i /Users/elainewy/Documents/secrets/gpu-test-yang.pem /Users/elainewy/Documents/work/pytorch-integration-testing/vllm-torch-nightly/Dockerfile.pinntorch ec2-user@ec2-35-91-52-34.us-west-2.compute.amazonaws.com:/home/ec2-user/test-vllm/
65-
#################Basic Correctness Test # 30min #################
66-
67-
68-
export VLLM_WORKER_MULTIPROC_METHOD=spawn
69-
pytest -v -s basic_correctness/test_cumem.py
70-
pytest -v -s basic_correctness/test_basic_correctness.py
71-
pytest -v -s basic_correctness/test_cpu_offload.py
72-
73-
74-
export VLLM_WORKER_MULTIPROC_METHOD=spawn
75-
pytest -v basic_correctness/test_cumem.py
76-
pytest -v basic_correctness/test_basic_correctness.py
77-
pytest -v basic_correctness/test_cpu_offload.py
78-
79-
80-
pytest -v -s v1/engine/test_engine_core.py
81-
82-
83-
# prefill chunk tests
84-
85-
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
86-
VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
87-
8847
# Regression Test
8948
pip install modelscope
9049
pytest -v -s test_regression.py

0 commit comments

Comments
 (0)