Recover offline_inference_npu.py to make doctest passed (#1756)

Yikun · web-flow · commit eff4b5791c19 · 2025-07-12T12:36:35.000+08:00
### What this PR does / why we need it? Rename offline_inference_npu_v1.py to offline_inference_npu.py to recover doctest ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: v0.9.2 - vLLM main: vllm-project/vllm@a859323 Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
diff --git a/.github/workflows/vllm_ascend_doctest.yaml b/.github/workflows/vllm_ascend_doctest.yaml
@@ -75,7 +75,9 @@ jobs:
           echo "Replacing /vllm-workspace/vllm-ascend/tests/e2e ..."
           rm -rf /vllm-workspace/vllm-ascend/tests/e2e
           mkdir -p /vllm-workspace/vllm-ascend/tests
+          # Overwrite e2e and examples
           cp -r tests/e2e /vllm-workspace/vllm-ascend/tests/
+          cp -r examples /vllm-workspace/vllm-ascend/
 
           # Simulate container to enter directory
           cd /workspace
diff --git a/examples/offline_inference_npu.py b/examples/offline_inference_npu.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+# Adapted from vllm-project/vllm/examples/offline_inference/basic.py
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# isort: skip_file
+import os
+
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+
+from vllm import LLM, SamplingParams
+
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+
+# Create a sampling params object.
+sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+# Create an LLM.
+llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
+
+# Generate texts from the prompts.
+outputs = llm.generate(prompts, sampling_params)
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
diff --git a/examples/offline_inference_npu_tp2.py b/examples/offline_inference_npu_tp2.py