info-fix

yangw-dev · yangw-dev · commit 089d85c0225b · 2025-04-20T16:36:35.000-07:00
diff --git a/vllm-torch-nightly/vllm_tests.sh b/vllm-torch-nightly/vllm_tests.sh
@@ -6,7 +6,7 @@ cd tests
 
 ################# ENTRYPONT TESTS #################
 # Comments: all passed except entrypoints/llm/test_guided_generate.py
-
+# Notes: currently all entrypoint tests are automatically run with V1 VLLM engine.
 export VLLM_WORKER_MULTIPROC_METHOD=spawn
 pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_generate_multiple_loras.py --ignore=entrypoints/llm/test_guided_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
 pytest -v -s entrypoints/llm/test_lazy_outlines.py
@@ -20,6 +20,10 @@ VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode
 ################# ENTRYPONT TESTS #################
 
 ################# v1 tests #################
+# Notes: the v1/endgine test failed with nightly torch 0419, bisect to 0415, and the test passed. the error is due to
+# RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
+# This seems to be a torch issue.
+
 # lib dependency: need lm-eval[api]==0.4.8
 pytest -v -s v1/core
 pytest -v -s v1/engine
@@ -35,56 +39,11 @@ pytest -v -s v1/e2e
 pip install -U git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
 pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
 ################# v1 tests #################
-
-#### v1 failed tests ####
-# two errors
-
-# Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
-# v1/engine/test_engine_core.py::test_engine_core
-# v1/engine/test_engine_core.py::test_engine_core_advanced_sampling
-# v1/engine/test_engine_core.py::test_engine_core_concurrent_batches
-
-
-
-pytest -v -s v1/engine/test_engine_core.py
-
-
-# Exception: Call to echo method failed: 'EngineCoreProc' object has no attribute 'echo'
-  tests/v1/engine/test_engine_core_client.py::test_engine_core_client[True] \
-  tests/v1/engine/test_engine_core_client.py::test_engine_core_client[False] \
-  tests/v1/engine/test_engine_core_client.py::test_engine_core_client_asyncio
-
 ##########Chunked Prefill Test #################
 VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
 VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
 ##########Chunked Prefill Test #################
 
-
-
-scp -i /Users/elainewy/Documents/secrets/gpu-test-yang.pem /Users/elainewy/Documents/work/pytorch-integration-testing/vllm-torch-nightly/Dockerfile.pinntorch ec2-user@ec2-35-91-52-34.us-west-2.compute.amazonaws.com:/home/ec2-user/test-vllm/
-#################Basic Correctness Test # 30min #################
-
-
-  export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  pytest -v -s basic_correctness/test_cumem.py
-  pytest -v -s basic_correctness/test_basic_correctness.py
-  pytest -v -s basic_correctness/test_cpu_offload.py
-
-
-  export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  pytest -v basic_correctness/test_cumem.py
-  pytest -v basic_correctness/test_basic_correctness.py
-  pytest -v basic_correctness/test_cpu_offload.py
-
-
-pytest -v -s v1/engine/test_engine_core.py
-
-
-# prefill chunk tests
-
-VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
-VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
-
 # Regression Test
 pip install modelscope
 pytest -v -s test_regression.py