6
6
7
7
# ################ ENTRYPONT TESTS #################
8
8
# Comments: all passed except entrypoints/llm/test_guided_generate.py
9
-
9
+ # Notes: currently all entrypoint tests are automatically run with V1 VLLM engine.
10
10
export VLLM_WORKER_MULTIPROC_METHOD=spawn
11
11
pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
12
12
pytest -v -s entrypoints/llm/test_lazy_outlines.py
@@ -20,6 +20,10 @@ VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode
20
20
# ################ ENTRYPONT TESTS #################
21
21
22
22
# ################ v1 tests #################
23
+ # Notes: the v1/endgine test failed with nightly torch 0419, bisect to 0415, and the test passed. the error is due to
24
+ # RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
25
+ # This seems to be a torch issue.
26
+
23
27
# lib dependency: need lm-eval[api]==0.4.8
24
28
pytest -v -s v1/core
25
29
pytest -v -s v1/engine
@@ -35,56 +39,11 @@ pytest -v -s v1/e2e
35
39
pip install -U git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
36
40
pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
37
41
# ################ v1 tests #################
38
-
39
- # ### v1 failed tests ####
40
- # two errors
41
-
42
- # Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
43
- # v1/engine/test_engine_core.py::test_engine_core
44
- # v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
45
- # v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
46
-
47
-
48
-
49
- pytest -v -s v1/engine/test_engine_core.py
50
-
51
-
52
- # Exception: Call to echo method failed: 'EngineCoreProc' object has no attribute 'echo'
53
- tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True] \
54
- tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False] \
55
- tests/v1/engine/test_engine_core_client.py::test_engine_core_client_asyncio
56
-
57
42
# #########Chunked Prefill Test #################
58
43
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
59
44
VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
60
45
# #########Chunked Prefill Test #################
61
46
62
-
63
-
64
- scp -i /Users/elainewy/Documents/secrets/gpu-test-yang.pem /Users/elainewy/Documents/work/pytorch-integration-testing/vllm-torch-nightly/Dockerfile.pinntorch ec2-user@ec2-35-91-52-34.us-west-2.compute.amazonaws.com:/home/ec2-user/test-vllm/
65
- # ################Basic Correctness Test # 30min #################
66
-
67
-
68
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
69
- pytest -v -s basic_correctness/test_cumem.py
70
- pytest -v -s basic_correctness/test_basic_correctness.py
71
- pytest -v -s basic_correctness/test_cpu_offload.py
72
-
73
-
74
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
75
- pytest -v basic_correctness/test_cumem.py
76
- pytest -v basic_correctness/test_basic_correctness.py
77
- pytest -v basic_correctness/test_cpu_offload.py
78
-
79
-
80
- pytest -v -s v1/engine/test_engine_core.py
81
-
82
-
83
- # prefill chunk tests
84
-
85
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
86
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
87
-
88
47
# Regression Test
89
48
pip install modelscope
90
49
pytest -v -s test_regression.py
0 commit comments