-
-
Notifications
You must be signed in to change notification settings - Fork 8.8k
Open
Labels
Description
Your current environment
>>> import vllm; vllm.__version__
INFO 05-28 19:02:30 [__init__.py:248] Automatically detected platform cuda.
'0.9.1.dev59+gb6a6e7a52'
>>>
>>> import torch; torch.__version__
'2.7.0+cu126'
>>> import transformers; transformers.__version__
'4.52.2'
🐛 Describe the bug
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] Traceback (most recent call last): (VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 517, in worker_busy_loop (VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] output = func(*args, **kwargs) (VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^ (VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 185, in determine_available_memory
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] self.model_runner.profile_run()
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1854, in profile_run
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] hidden_states = self._dummy_run(self.max_num_tokens)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return func(*args, **kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1695, in _dummy_run
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] outputs = model(
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return self._call_impl(*args, **kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return forward_call(*args, **kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/model_executor/models/qwen3.py", line 300, in forward
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] hidden_states = self.model(input_ids, positions, intermediate_tensors,
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/decorators.py", line 238, in __call__
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] output = self.compiled_callable(*args, **kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 663, in _fn
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] raise e.remove_dynamo_frames() from None # see TORCHDYNAMO_VERBOSE=1
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/_dynamo/output_graph.py", line 1544, in _call_user_compiler
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] raise BackendCompilerFailed(
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/_dynamo/output_graph.py", line 1519, in _call_user_compiler
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] compiled_fn = compiler_fn(gm, self.example_inputs())
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/_dynamo/repro/after_dynamo.py", line 150, in __call__
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] compiled_gm = compiler_fn(gm, example_inputs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/_dynamo/repro/after_dynamo.py", line 150, in __call__
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] compiled_gm = compiler_fn(gm, example_inputs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/__init__.py", line 2392, in __call__
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return self.compiler_fn(model_, inputs_, **self.kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/backends.py", line 498, in __call__
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] PiecewiseCompileInterpreter(self.split_gm, submod_names_to_compile,
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/backends.py", line 275, in run
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return super().run(*fake_args)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/fx/interpreter.py", line 171, in run
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] self.env[node] = self.run_node(node)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/torch/fx/interpreter.py", line 240, in run_node
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] return getattr(self, n.op)(n.target, args, kwargs)
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/backends.py", line 291, in call_module
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] compiler_manager.compile(
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/backends.py", line 147, in compile
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] compiled_graph, handle = self.compiler.compile(
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/compilation/compiler_interface.py", line 414, in compile
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] assert hash_str is not None, (
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] ^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] torch._dynamo.exc.BackendCompilerFailed: backend='<vllm.compilation.backends.VllmBackend object at 0x7fc6002fbe90>' raised:
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] AssertionError: failed to get the hash of the compiled graph
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522]
(VllmWorker rank=1 pid=191128) ERROR 05-28 18:58:32 [multiproc_executor.py:522] Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/ml/validation.py", line 43, in infer
llm = vllm.LLM(
^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/utils.py", line 1177, in inner
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/entrypoints/llm.py", line 250, in __init__
self.llm_engine = LLMEngine.from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 511, in from_engine_args
return engine_cls.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py", line 115, in from_vllm_config
return cls(vllm_config=vllm_config,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/llm_engine.py", line 92, in __init__
self.engine_core = EngineCoreClient.make_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 75, in make_client
return SyncMPClient(vllm_config, executor_class, log_stats)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 580, in __init__
super().__init__(
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 418, in __init__
self._wait_for_engine_startup(output_address, parallel_config)
File "/mnt/fs/venv_cu126_py312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 484, in _wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {'EngineCore_0': 1}
""
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
huydhn, zhjunqin, sitatec and jxmorris12
Metadata
Metadata
Assignees
Labels
Type
Projects
Status
To triage