Skip to content

Commit 787010a

Browse files
authored
[Test] Remove VLLM_USE_V1 in example and tests (#1733)
V1 is enabled by default, no need to set it by hand now. This PR remove the useless setting in example and tests - vLLM version: v0.9.2 - vLLM main: vllm-project/vllm@9ad0a45 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent eb921d2 commit 787010a

29 files changed

+193
-298
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 14 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,10 @@ concurrency:
4141

4242
jobs:
4343
lint:
44-
# Only trigger lint on pull request
45-
if: ${{ github.event_name == 'pull_request' }}
4644
uses: ./.github/workflows/pre-commit.yml
4745

4846
changes:
49-
# Only trigger changes on pull request
50-
if: ${{ github.event_name == 'pull_request' }}
5147
runs-on: ubuntu-latest
52-
permissions:
53-
pull-requests: read
5448
outputs:
5549
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
5650
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -60,20 +54,24 @@ jobs:
6054
with:
6155
filters: |
6256
e2e_tracker:
57+
- '.github/workflows/vllm_ascend_test.yaml'
6358
- 'vllm_ascend/**'
6459
- 'csrc/**'
6560
- 'cmake/**'
6661
- 'tests/e2e/**'
67-
- 'tests/conftest.py'
68-
- 'tests/model_utils.py'
69-
- 'tests/utils.py'
62+
- 'CMakeLists.txt'
63+
- 'setup.py'
64+
- 'requirements.txt'
65+
- 'requirements-dev.txt'
66+
- 'requirements-lint.txt'
67+
- 'packages.txt'
7068
ut_tracker:
7169
- 'tests/ut/**'
7270
ut:
7371
needs: [lint, changes]
7472
name: unit test
75-
# only trigger unit test after lint passed and the change is e2e and ut related. Or the PR is merged.
76-
if: ${{ github.event_name == 'push' || (needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true')) }}
73+
# only trigger unit test after lint passed and the change is e2e and ut related.
74+
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
7775
runs-on: ubuntu-latest
7876
container:
7977
image: quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
@@ -112,9 +110,8 @@ jobs:
112110
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
113111
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
114112
115-
- name: Run unit test for V1 Engine
113+
- name: Run unit test
116114
env:
117-
VLLM_USE_V1: 1
118115
VLLM_WORKER_MULTIPROC_METHOD: spawn
119116
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
120117
run: |
@@ -133,8 +130,8 @@ jobs:
133130

134131
e2e:
135132
needs: [lint, changes]
136-
# only trigger e2e test after lint passed and the change is e2e related.
137-
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
133+
# only trigger e2e test after lint passed and the change is e2e related with pull request.
134+
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
138135
strategy:
139136
max-parallel: 2
140137
matrix:
@@ -189,9 +186,8 @@ jobs:
189186
pip install -r requirements-dev.txt
190187
pip install -v -e .
191188
192-
- name: Run e2e test for V1 Engine
189+
- name: Run e2e test
193190
env:
194-
VLLM_USE_V1: 1
195191
VLLM_WORKER_MULTIPROC_METHOD: spawn
196192
VLLM_USE_MODELSCOPE: True
197193
run: |
@@ -213,26 +209,6 @@ jobs:
213209
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
214210
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
215211
216-
- name: Run e2e test on V0 engine
217-
if: ${{ github.event_name == 'schedule' }}
218-
env:
219-
VLLM_USE_V1: 0
220-
VLLM_USE_MODELSCOPE: True
221-
run: |
222-
pytest -sv tests/e2e/singlecard/test_offline_inference.py
223-
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
224-
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
225-
pytest -sv tests/e2e/singlecard/test_camem.py
226-
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
227-
pytest -sv tests/e2e/singlecard/test_embedding.py
228-
pytest -sv tests/e2e/singlecard/ \
229-
--ignore=tests/e2e/singlecard/test_offline_inference.py \
230-
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
231-
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
232-
--ignore=tests/e2e/singlecard/test_camem.py \
233-
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
234-
--ignore=tests/e2e/singlecard/test_embedding.py
235-
236212
e2e-4-cards:
237213
needs: [e2e]
238214
if: ${{ needs.e2e.result == 'success' }}
@@ -290,9 +266,8 @@ jobs:
290266
pip install -r requirements-dev.txt
291267
pip install -v -e .
292268
293-
- name: Run vllm-project/vllm-ascend test for V1 Engine
269+
- name: Run vllm-project/vllm-ascend test
294270
env:
295-
VLLM_USE_V1: 1
296271
VLLM_WORKER_MULTIPROC_METHOD: spawn
297272
VLLM_USE_MODELSCOPE: True
298273
run: |
@@ -308,19 +283,3 @@ jobs:
308283
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
309284
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
310285
--ignore=tests/e2e/multicard/test_data_parallel.py
311-
312-
- name: Run vllm-project/vllm-ascend test on V0 engine
313-
if: ${{ github.event_name == 'schedule' }}
314-
env:
315-
VLLM_USE_V1: 0
316-
VLLM_USE_MODELSCOPE: True
317-
run: |
318-
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
319-
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
320-
# To avoid oom, we need to run the test in a single process.
321-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
322-
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
323-
pytest -sv tests/e2e/multicard/test_data_parallel.py
324-
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \
325-
--ignore=tests/e2e/multicard/test_offline_inference_distributed.py \
326-
--ignore=tests/e2e/multicard/test_data_parallel.py

examples/offline_data_parallel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ def main(
120120
trust_remote_code,
121121
):
122122
# DP only support on V1 engine
123-
os.environ["VLLM_USE_V1"] = "1"
124123
os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
125124
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
126125
os.environ["VLLM_DP_SIZE"] = str(dp_size)

examples/offline_dualbatch_overlap_npu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
# enable dual-batch overlap for vllm ascend
77
os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1"
8-
os.environ["VLLM_USE_V1"] = "1"
98

109
# Sample prompts.
1110
prompts = ["The president of the United States is"] * 41

examples/offline_inference_sleep_mode_npu.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from vllm import LLM, SamplingParams
2323
from vllm.utils import GiB_bytes
2424

25-
os.environ["VLLM_USE_V1"] = "1"
2625
os.environ["VLLM_USE_MODELSCOPE"] = "True"
2726
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
2827

examples/run_dp_attention_etp16.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
export VLLM_USE_V1=1
21
export TASK_QUEUE_ENABLE=1
32
source /usr/local/Ascend/ascend-toolkit/set_env.sh
43
source /usr/local/Ascend/nnal/atb/set_env.sh

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ xgrammar
1212
zmq
1313
types-psutil
1414
pytest-cov
15+
regex
1516
sentence_transformers

requirements-lint.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ pre-commit==4.0.1
44
# type checking
55
mypy==1.11.1
66
types-PyYAML
7+
types-regex
78
types-requests
89
types-setuptools

tests/conftest.py renamed to tests/e2e/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@
3939
from vllm.transformers_utils.utils import maybe_model_redirect
4040
from vllm.utils import is_list_of
4141

42-
from tests.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
43-
TokensTextLogprobsPromptLogprobs)
42+
from tests.e2e.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
43+
TokensTextLogprobsPromptLogprobs)
4444
# TODO: remove this part after the patch merged into vllm, if
4545
# we not explicitly patch here, some of them might be effectiveless
4646
# in pytest scenario
@@ -62,7 +62,7 @@
6262
PromptVideoInput = _PromptMultiModalInput[np.ndarray]
6363

6464
_TEST_DIR = os.path.dirname(__file__)
65-
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "e2e", "prompts", "example.txt")]
65+
_TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
6666

6767

6868
def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
File renamed without changes.

tests/e2e/multicard/test_fused_moe_allgather_ep.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@
2626
from modelscope import snapshot_download # type: ignore
2727
from vllm import SamplingParams
2828

29-
from tests.conftest import VllmRunner
29+
from tests.e2e.conftest import VllmRunner
3030

3131

3232
@patch.dict(
3333
os.environ, {
34-
"VLLM_USE_V1": "1",
3534
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
3635
"TASK_QUEUE_ENABLE": "1",
3736
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP": "1"
@@ -56,12 +55,10 @@ def test_generate_with_allgather():
5655
vllm_model.generate(example_prompts, sampling_params)
5756

5857

59-
@patch.dict(
60-
os.environ, {
61-
"VLLM_USE_V1": "1",
62-
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
63-
"TASK_QUEUE_ENABLE": "1"
64-
})
58+
@patch.dict(os.environ, {
59+
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
60+
"TASK_QUEUE_ENABLE": "1"
61+
})
6562
def test_generate_with_alltoall():
6663
example_prompts = ["Hello, my name is"]
6764
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
@@ -79,4 +76,4 @@ def test_generate_with_alltoall():
7976
},
8077
"expert_tensor_parallel_size": 1
8178
}) as vllm_model:
82-
vllm_model.generate(example_prompts, sampling_params)
79+
vllm_model.generate(example_prompts, sampling_params)

0 commit comments

Comments
 (0)