From 190f6caa682e1869cc5de7514090e4cc9b5b52f2 Mon Sep 17 00:00:00 2001 From: Sebastian Schoennenbeck Date: Wed, 21 May 2025 22:41:23 +0200 Subject: [PATCH 01/26] [Bugfix] Consistent ascii handling in tool parsers (#17704) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sebastian Schönnenbeck Signed-off-by: Alexandru Badea --- .../tool_parsers/granite_20b_fc_tool_parser.py | 12 ++++++++---- .../openai/tool_parsers/granite_tool_parser.py | 12 ++++++++---- .../openai/tool_parsers/internlm2_tool_parser.py | 12 ++++++++---- .../openai/tool_parsers/jamba_tool_parser.py | 16 ++++++++++------ .../openai/tool_parsers/llama_tool_parser.py | 12 ++++++++---- .../openai/tool_parsers/phi4mini_tool_parser.py | 9 +++++---- .../openai/tool_parsers/pythonic_tool_parser.py | 9 ++++++--- 7 files changed, 53 insertions(+), 29 deletions(-) diff --git a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py index 76da63c5800..61aa3b1092d 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py @@ -80,7 +80,8 @@ def extract_tool_calls( function=FunctionCall( name=function_call["name"], # function call args are JSON but as a string - arguments=json.dumps(function_call["arguments"]), + arguments=json.dumps(function_call["arguments"], + ensure_ascii=False), ), ) for function_call in raw_function_calls ] @@ -166,7 +167,8 @@ def extract_tool_calls_streaming( if self.current_tool_id >= 0: cur_arguments = current_tool_call.get("arguments") if cur_arguments: - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) sent = len( self.streamed_args_for_tool[self.current_tool_id]) argument_diff = cur_args_json[sent:] @@ -218,7 +220,8 @@ def extract_tool_calls_streaming( if cur_arguments: sent = len( self.streamed_args_for_tool[self.current_tool_id]) - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) prev_arguments = self.prev_tool_call_arr[ self.current_tool_id].get("arguments") @@ -226,7 +229,8 @@ def extract_tool_calls_streaming( if is_complete[self.current_tool_id]: argument_diff = cur_args_json[sent:] elif prev_arguments: - prev_args_json = json.dumps(prev_arguments) + prev_args_json = json.dumps(prev_arguments, + ensure_ascii=False) if cur_args_json != prev_args_json: prefix = find_common_prefix( diff --git a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py index 91afc88ef3d..52c78e8d9f7 100644 --- a/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py @@ -67,7 +67,8 @@ def extract_tool_calls( function=FunctionCall( name=function_call["name"], # function call args are JSON but as a string - arguments=json.dumps(function_call["arguments"]), + arguments=json.dumps(function_call["arguments"], + ensure_ascii=False), ), ) for function_call in raw_function_calls ] @@ -151,7 +152,8 @@ def extract_tool_calls_streaming( if self.current_tool_id >= 0: cur_arguments = current_tool_call.get("arguments") if cur_arguments: - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) sent = len( self.streamed_args_for_tool[self.current_tool_id]) argument_diff = cur_args_json[sent:] @@ -197,7 +199,8 @@ def extract_tool_calls_streaming( if cur_arguments: sent = len( self.streamed_args_for_tool[self.current_tool_id]) - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) prev_arguments = self.prev_tool_call_arr[ self.current_tool_id].get("arguments") @@ -205,7 +208,8 @@ def extract_tool_calls_streaming( if is_complete[self.current_tool_id]: argument_diff = cur_args_json[sent:] elif prev_arguments: - prev_args_json = json.dumps(prev_arguments) + prev_args_json = json.dumps(prev_arguments, + ensure_ascii=False) if cur_args_json != prev_args_json: prefix = find_common_prefix( prev_args_json, cur_args_json) diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index 57d7c77c64f..59ac36cd23b 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -133,7 +133,8 @@ def extract_tool_calls_streaming( delta = None # first time to get parameters elif cur_arguments and not prev_arguments: - cur_arguments_json = json.dumps(cur_arguments) + cur_arguments_json = json.dumps(cur_arguments, + ensure_ascii=False) arguments_delta = cur_arguments_json[:cur_arguments_json. index(delta_text) + @@ -148,8 +149,10 @@ def extract_tool_calls_streaming( self.current_tool_id] += arguments_delta # both prev and cur parameters, send the increase parameters elif cur_arguments and prev_arguments: - cur_args_json = json.dumps(cur_arguments) - prev_args_json = json.dumps(prev_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) + prev_args_json = json.dumps(prev_arguments, + ensure_ascii=False) argument_diff = extract_intermediate_diff( cur_args_json, prev_args_json) @@ -190,7 +193,8 @@ def extract_tool_calls( action_dict = json.loads(action) name, parameters = action_dict['name'], json.dumps( action_dict.get('parameters', action_dict.get('arguments', - {}))) + {})), + ensure_ascii=False) if not tools or name not in [t.function.name for t in tools]: ExtractedToolCallInformation(tools_called=False, diff --git a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py index 8df106bf271..50fed9baf8f 100644 --- a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py @@ -96,8 +96,9 @@ def extract_tool_calls( function=FunctionCall( name=function_call["name"], # function call args are JSON but as a string - arguments=json.dumps(function_call["arguments"]))) - for function_call in raw_function_calls + arguments=json.dumps(function_call["arguments"], + ensure_ascii=False), + )) for function_call in raw_function_calls ] content = model_output[:model_output. @@ -187,7 +188,7 @@ def extract_tool_calls_streaming( diff: Union[str, None] = current_tool_call.get("arguments") if diff: - diff = json.dumps(diff).replace( + diff = json.dumps(diff, ensure_ascii=False).replace( self.streamed_args_for_tool[self.current_tool_id], "") delta = DeltaMessage(tool_calls=[ @@ -248,7 +249,8 @@ def extract_tool_calls_streaming( "mid-arguments") delta = None elif cur_arguments and not prev_arguments: - cur_arguments_json = json.dumps(cur_arguments) + cur_arguments_json = json.dumps(cur_arguments, + ensure_ascii=False) logger.debug("finding %s in %s", new_text, cur_arguments_json) @@ -267,8 +269,10 @@ def extract_tool_calls_streaming( self.current_tool_id] += arguments_delta elif cur_arguments and prev_arguments: - cur_args_json = json.dumps(cur_arguments) - prev_args_json = json.dumps(prev_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) + prev_args_json = json.dumps(prev_arguments, + ensure_ascii=False) logger.debug("Searching for diff between \n%s\n%s", cur_args_json, prev_args_json) diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index 5c181616aa0..9dbd7efdc44 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -88,7 +88,8 @@ def extract_tool_calls( # function call args are JSON but as a string arguments=json.dumps(raw_function_call["arguments"] \ if "arguments" in raw_function_call \ - else raw_function_call["parameters"]))) + else raw_function_call["parameters"], + ensure_ascii=False))) for raw_function_call in function_call_arr ] @@ -174,7 +175,8 @@ def extract_tool_calls_streaming( if self.current_tool_id >= 0: cur_arguments = current_tool_call.get("arguments") if cur_arguments: - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) sent = len( self.streamed_args_for_tool[self.current_tool_id]) argument_diff = cur_args_json[sent:] @@ -226,7 +228,8 @@ def extract_tool_calls_streaming( if cur_arguments: sent = len( self.streamed_args_for_tool[self.current_tool_id]) - cur_args_json = json.dumps(cur_arguments) + cur_args_json = json.dumps(cur_arguments, + ensure_ascii=False) prev_arguments = self.prev_tool_call_arr[ self.current_tool_id].get("arguments") @@ -234,7 +237,8 @@ def extract_tool_calls_streaming( if is_complete[self.current_tool_id]: argument_diff = cur_args_json[sent:] elif prev_arguments: - prev_args_json = json.dumps(prev_arguments) + prev_args_json = json.dumps(prev_arguments, + ensure_ascii=False) if cur_args_json != prev_args_json: prefix = find_common_prefix( diff --git a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py index 668776a832e..084f7acb5d8 100644 --- a/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py @@ -79,10 +79,11 @@ def extract_tool_calls( name=raw_function_call["name"], # function call args are JSON but as a string arguments=json.dumps( - raw_function_call["arguments"] if "arguments" in - raw_function_call else - raw_function_call["parameters"]))) - for raw_function_call in function_call_arr + raw_function_call["arguments"] + if "arguments" in raw_function_call else + raw_function_call["parameters"], + ensure_ascii=False), + )) for raw_function_call in function_call_arr ] # get any content before the tool call diff --git a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py index 9f141d6b334..e795eb3fa8c 100644 --- a/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py @@ -200,9 +200,12 @@ def _handle_single_tool(call: ast.Call) -> ToolCall: arguments = {} for keyword in call.keywords: arguments[keyword.arg] = _get_parameter_value(keyword.value) - return ToolCall(type="function", - function=FunctionCall(name=function_name, - arguments=json.dumps(arguments))) + return ToolCall( + type="function", + function=FunctionCall(name=function_name, + arguments=json.dumps(arguments, + ensure_ascii=False)), + ) def _make_valid_python(text: str) -> Union[tuple[str, str], None]: From 328af25234ae30068305669cbb968e3b8a936194 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Wed, 9 Jul 2025 16:30:22 +0300 Subject: [PATCH 02/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" Signed-off-by: Alexandru Badea --- vllm/executor/ray_distributed_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index 9b0b98731e0..04b48d53021 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -528,12 +528,12 @@ def _wait_for_tasks_completion(self, parallel_worker_tasks: Any) -> None: ray.get(parallel_worker_tasks) def _check_ray_cgraph_installation(self): - import pkg_resources + import importlib.metadata from packaging import version required_version = version.parse("2.43.0") current_version = version.parse( - pkg_resources.get_distribution("ray").version) + importlib.metadata.version("ray")) if current_version < required_version: raise ValueError(f"Ray version {required_version} is " f"required, but found {current_version}") From d820b7abf38528b43cd7031647e6d44579c42360 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 08:28:52 +0300 Subject: [PATCH 03/26] revert changes for pre-commit check Signed-off-by: Alexandru Badea --- vllm/executor/ray_distributed_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index 04b48d53021..9b0b98731e0 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -528,12 +528,12 @@ def _wait_for_tasks_completion(self, parallel_worker_tasks: Any) -> None: ray.get(parallel_worker_tasks) def _check_ray_cgraph_installation(self): - import importlib.metadata + import pkg_resources from packaging import version required_version = version.parse("2.43.0") current_version = version.parse( - importlib.metadata.version("ray")) + pkg_resources.get_distribution("ray").version) if current_version < required_version: raise ValueError(f"Ray version {required_version} is " f"required, but found {current_version}") From dfb943ad5270411285de210b38afcaafa3bb270d Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 08:48:12 +0300 Subject: [PATCH 04/26] add types-setuptools to the lint requirements Signed-off-by: Alexandru Badea --- requirements/lint.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements/lint.txt b/requirements/lint.txt index 62446f94048..81f0cfc76a6 100644 --- a/requirements/lint.txt +++ b/requirements/lint.txt @@ -1,2 +1,4 @@ # formatting pre-commit==4.0.1 + +types-setuptools \ No newline at end of file From 4b86ed6d801e54286b6fde633397d29ff2f70a48 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 09:03:44 +0300 Subject: [PATCH 05/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (1) Signed-off-by: Alexandru Badea --- pyproject.toml | 7 +++++++ requirements/lint.txt | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b5f1039b44d..e91f8c0fac2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,13 @@ Slack="http://slack.vllm.ai/" [project.scripts] vllm = "vllm.entrypoints.cli.main:main" +[project.optional-dependencies] +dev = [ + "types-setuptools", # Required by MyPy for pkg_resources module + # Other development-specific tools might go here too, + # e.g., "mypy", "ruff", "pre-commit" if not otherwise managed. +] + [tool.setuptools_scm] # no extra settings needed, presence enables setuptools-scm diff --git a/requirements/lint.txt b/requirements/lint.txt index 81f0cfc76a6..62446f94048 100644 --- a/requirements/lint.txt +++ b/requirements/lint.txt @@ -1,4 +1,2 @@ # formatting pre-commit==4.0.1 - -types-setuptools \ No newline at end of file From 84bc0dba78616c82aebfe7aee72dfee3dcf1e480 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 09:16:29 +0300 Subject: [PATCH 06/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (2) Signed-off-by: Alexandru Badea --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e91f8c0fac2..22728ad669d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Information Analysis", ] requires-python = ">=3.9,<3.13" -dynamic = [ "version", "dependencies", "optional-dependencies"] +dynamic = [ "version", "dependencies"] [project.urls] Homepage="https://github.com/vllm-project/vllm" From db0c2d678ef822d63e7d8b90f3e933071ac65307 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 09:32:59 +0300 Subject: [PATCH 07/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (3) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 6ab63a40277..60583608ba3 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,12 +9,16 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + - uses: actions/checkout@v4 # Use the latest stable version tag + - uses: actions/setup-python@v5 # Use the latest stable version tag with: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" - - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + - name: Install vLLM Build Dependencies + run: pip install --requirement requirements/build.txt + - name: Install vLLM Project and Dev Dependencies + run: pip install -e ".[dev]" + - uses: pre-commit/action@v3.0.1 # Use the latest stable version tag with: - extra_args: --all-files --hook-stage manual + extra_args: --all-files --hook-stage manual \ No newline at end of file From 03b99f57d9e170ae2120a2833330e591d30da385 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 10:28:41 +0300 Subject: [PATCH 08/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (4) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 60583608ba3..8bf7d0027d5 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,8 +15,10 @@ jobs: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" + - name: Set VLLM_TARGET_DEVICE to CPU for pre-commit checks + run: echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV - name: Install vLLM Build Dependencies - run: pip install --requirement requirements/build.txt + run: pip install -r requirements/build.txt - name: Install vLLM Project and Dev Dependencies run: pip install -e ".[dev]" - uses: pre-commit/action@v3.0.1 # Use the latest stable version tag From 3b528fe13eda52e01be63e5e20ec44d74d14487e Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 10:40:05 +0300 Subject: [PATCH 09/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (5) Signed-off-by: Alexandru Badea --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a1867960e59..26b1437317a 100755 --- a/setup.py +++ b/setup.py @@ -585,8 +585,9 @@ def get_vllm_version() -> str: elif _is_tpu(): version += f"{sep}tpu" elif _is_cpu(): - if envs.VLLM_TARGET_DEVICE == "cpu": - version += f"{sep}cpu" + # For CPU builds, we don't append a suffix to the version. + # The standard PyPI `torch` package is CPU-only by default. + pass # Do not append +cpu to the version string elif _is_xpu(): version += f"{sep}xpu" else: From 31602110d06d19c87f788d3c94abc2073dda2d9c Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 10:48:45 +0300 Subject: [PATCH 10/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (5) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 8bf7d0027d5..8114e5c3de4 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,18 +9,28 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 # Use the latest stable version tag - - uses: actions/setup-python@v5 # Use the latest stable version tag + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" - name: Set VLLM_TARGET_DEVICE to CPU for pre-commit checks run: echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV - - name: Install vLLM Build Dependencies - run: pip install -r requirements/build.txt + - name: Install PyTorch for CPU + # Explicitly install the CPU version of PyTorch from their stable index. + # This ensures pip finds the correct wheel without the problematic '+cpu' tag. + # Check PyTorch's official website for the exact command for torch==2.6.0 CPU + # As of my last update, it would typically be: + run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu + - name: Install vLLM Build Dependencies (excluding torch, as it's already installed) + # We need to filter out torch from build.txt for this step + # A simple way is to use grep or sed, or if build.txt is small, just list others. + # Given build.txt has `torch==2.6.0`, we'll exclude it here. + run: pip install $(grep -v 'torch==' requirements/build.txt | tr '\n' ' ') - name: Install vLLM Project and Dev Dependencies run: pip install -e ".[dev]" - - uses: pre-commit/action@v3.0.1 # Use the latest stable version tag + + - uses: pre-commit/action@v3.0.1 with: extra_args: --all-files --hook-stage manual \ No newline at end of file From b1632888dacd53c0dbfd7277c8394cc3acdb1bae Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 10:53:52 +0300 Subject: [PATCH 11/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (6) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 8114e5c3de4..14f0722df12 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -18,16 +18,10 @@ jobs: - name: Set VLLM_TARGET_DEVICE to CPU for pre-commit checks run: echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV - name: Install PyTorch for CPU - # Explicitly install the CPU version of PyTorch from their stable index. - # This ensures pip finds the correct wheel without the problematic '+cpu' tag. - # Check PyTorch's official website for the exact command for torch==2.6.0 CPU - # As of my last update, it would typically be: run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu - - name: Install vLLM Build Dependencies (excluding torch, as it's already installed) - # We need to filter out torch from build.txt for this step - # A simple way is to use grep or sed, or if build.txt is small, just list others. - # Given build.txt has `torch==2.6.0`, we'll exclude it here. - run: pip install $(grep -v 'torch==' requirements/build.txt | tr '\n' ' ') + - name: Install vLLM Build Dependencies (excluding torch and comments) + # This command filters out lines with 'torch==' AND lines starting with '#' + run: pip install $(grep -v 'torch==' requirements/build.txt | grep -v '^#' | tr '\n' ' ') - name: Install vLLM Project and Dev Dependencies run: pip install -e ".[dev]" From 459cb6dbe9f20a4af2d642a0bd94df98f0daf6cd Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 11:06:39 +0300 Subject: [PATCH 12/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (7) Signed-off-by: Alexandru Badea a Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 14f0722df12..052c44e4f15 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,22 +9,25 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v4 # Keep this as v4 + - uses: actions/setup-python@v5 # Keep this as v5 with: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" - - name: Set VLLM_TARGET_DEVICE to CPU for pre-commit checks - run: echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV + + - name: Set VLLM_TARGET_DEVICE and VLLM_CMAKE_ARGS for CPU build + run: | + echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV + # Crucial: Tell CMake to explicitly disable CUDA usage. + # This should override PyTorch/Caffe2's internal checks for CUDA libraries. + echo "VLLM_CMAKE_ARGS=-DUSE_CUDA=OFF" >> $GITHUB_ENV - name: Install PyTorch for CPU run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu - name: Install vLLM Build Dependencies (excluding torch and comments) - # This command filters out lines with 'torch==' AND lines starting with '#' run: pip install $(grep -v 'torch==' requirements/build.txt | grep -v '^#' | tr '\n' ' ') - name: Install vLLM Project and Dev Dependencies run: pip install -e ".[dev]" - - - uses: pre-commit/action@v3.0.1 + - uses: pre-commit/action@v3.0.1 # Keep this as v3.0.1 with: extra_args: --all-files --hook-stage manual \ No newline at end of file From b2f6d015dad7b4733dde90006e52c79fc61a32b0 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 11:23:13 +0300 Subject: [PATCH 13/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (8) Signed-off-by: Alexandru Badea a Signed-off-by: Alexandru Badea --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 26b1437317a..4cf28610627 100755 --- a/setup.py +++ b/setup.py @@ -238,6 +238,9 @@ def target_name(s: str) -> str: *[f"--target={name}" for name in targets], ] + if VLLM_TARGET_DEVICE == "cpu": + build_args.append("-DUSE_CUDA=OFF") + subprocess.check_call(["cmake", *build_args], cwd=self.build_temp) # Install the libraries From b6ae15563c2a2488e664550521cc785eaedb3830 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 11:34:20 +0300 Subject: [PATCH 14/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (9) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 1 - setup.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 052c44e4f15..4889ed3a874 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -21,7 +21,6 @@ jobs: echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV # Crucial: Tell CMake to explicitly disable CUDA usage. # This should override PyTorch/Caffe2's internal checks for CUDA libraries. - echo "VLLM_CMAKE_ARGS=-DUSE_CUDA=OFF" >> $GITHUB_ENV - name: Install PyTorch for CPU run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu - name: Install vLLM Build Dependencies (excluding torch and comments) diff --git a/setup.py b/setup.py index 4cf28610627..a2411d349fd 100755 --- a/setup.py +++ b/setup.py @@ -149,6 +149,9 @@ def configure(self, ext: CMakeExtension) -> None: '-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE), ] + if VLLM_TARGET_DEVICE == "cpu": + cmake_args.append("-DUSE_CUDA=OFF") + verbose = envs.VERBOSE if verbose: cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON'] @@ -238,9 +241,6 @@ def target_name(s: str) -> str: *[f"--target={name}" for name in targets], ] - if VLLM_TARGET_DEVICE == "cpu": - build_args.append("-DUSE_CUDA=OFF") - subprocess.check_call(["cmake", *build_args], cwd=self.build_temp) # Install the libraries From 665ab7f0a24df8bf41096829237114e577153408 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 11:44:44 +0300 Subject: [PATCH 15/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (10) Signed-off-by: Alexandru Badea --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index a2411d349fd..770d1f8d5e4 100755 --- a/setup.py +++ b/setup.py @@ -151,6 +151,9 @@ def configure(self, ext: CMakeExtension) -> None: if VLLM_TARGET_DEVICE == "cpu": cmake_args.append("-DUSE_CUDA=OFF") + cmake_args.append("-DBUILD_CUDA_LIBS=OFF") + cmake_args.append("-DUSE_CUDNN=OFF") + cmake_args.append("-DTORCH_CUDA_ARCH_LIST=NoCUDA") verbose = envs.VERBOSE if verbose: From 5d272749921d7b7db35685d19e417253459842df Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 11:57:55 +0300 Subject: [PATCH 16/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (11) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 21 +-- CMakeLists.txt | 243 +++++++++++++++++++++---------- 2 files changed, 178 insertions(+), 86 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 4889ed3a874..6e1c15f0464 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,24 +9,25 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 # Keep this as v4 - - uses: actions/setup-python@v5 # Keep this as v5 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" - - - name: Set VLLM_TARGET_DEVICE and VLLM_CMAKE_ARGS for CPU build + + - name: Install PyTorch for CPU and NumPy run: | - echo "VLLM_TARGET_DEVICE=cpu" >> $GITHUB_ENV - # Crucial: Tell CMake to explicitly disable CUDA usage. - # This should override PyTorch/Caffe2's internal checks for CUDA libraries. - - name: Install PyTorch for CPU - run: pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu + pip install numpy + pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu + - name: Install vLLM Build Dependencies (excluding torch and comments) run: pip install $(grep -v 'torch==' requirements/build.txt | grep -v '^#' | tr '\n' ' ') + - name: Install vLLM Project and Dev Dependencies + run: pip install -e ".[dev]" - - uses: pre-commit/action@v3.0.1 # Keep this as v3.0.1 + + - uses: pre-commit/action@v3.0.1 with: extra_args: --all-files --hook-stage manual \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3314f05fd2a..77f75258ad1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,8 +25,8 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) set(ignoreMe "${VLLM_PYTHON_PATH}") # -# Supported python versions. These versions will be searched in order, the -# first match will be selected. These should be kept in sync with setup.py. +# Supported python versions. These versions will be searched in order, the +# first match will be selected. These should be kept in sync with setup.py. # set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12") @@ -43,7 +43,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1 # rather than an error. # # Note: the CUDA torch version is derived from pyproject.toml and various -# requirements.txt files and should be kept consistent. The ROCm torch +# requirements.txt files and should be kept consistent. The ROCm torch # versions are derived from docker/Dockerfile.rocm # set(TORCH_SUPPORTED_VERSION_CUDA "2.6.0") @@ -66,10 +66,51 @@ endif() # append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") + + +# Move the core logic for CPU-only builds much earlier. +# If building for CPU, we will handle Torch finding differently and then exit. +if (VLLM_TARGET_DEVICE STREQUAL "cpu") + message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") + + # Explicitly set CUDA_FOUND and HIP_FOUND to FALSE for CPU builds. + # This prevents 'find_package(Torch)' from potentially setting them based on + # system-wide detections or the Torch config's internal checks for CUDA components. + set(CUDA_FOUND FALSE CACHE INTERNAL "Force CUDA to be not found for CPU build" FORCE) + set(HIP_FOUND FALSE CACHE INTERNAL "Force HIP to be not found for CPU build" FORCE) + + # Set VLLM_GPU_LANG to an empty string or a placeholder + set(VLLM_GPU_LANG "" CACHE INTERNAL "No GPU language for CPU build" FORCE) + + # Now, find Torch, but with explicit instructions to not look for CUDA/HIP + # This might still trigger Caffe2 warnings, but it's the correct way to try. + # If the Caffe2 error persists here, it means the PyTorch CPU wheel's CMake + # config is truly stubborn. + find_package(Torch REQUIRED COMPONENTS Python) # Only require Python components + + # Ensure the 'nvcc' command is NOT searched for or checked for CPU builds. + # This whole 'find_program(NVCC_EXECUTABLE nvcc)' block is now unnecessary for CPU. + # We will simply not define NVCC_EXECUTABLE or let it be 'NOTFOUND'. + + # Include the cpu_extension.cmake directly and then return/exit this CMakeList. + # This ensures no further GPU-specific code is processed. + include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) + + # After configuring for CPU, we don't need to process any GPU-specific logic. + # This `return()` effectively stops CMake processing this file for GPU-related sections. + return() +endif() + + + +# The following blocks will ONLY be processed if VLLM_TARGET_DEVICE is NOT "cpu" +# (i.e., it's "cuda" or "rocm" or potentially some other future GPU target) + # Ensure the 'nvcc' command is in the PATH +# This block is now outside the "if cpu" condition, so it only runs for GPU builds. find_program(NVCC_EXECUTABLE nvcc) if (CUDA_FOUND AND NOT NVCC_EXECUTABLE) - message(FATAL_ERROR "nvcc not found") + message(FATAL_ERROR "nvcc not found") endif() # @@ -78,13 +119,18 @@ endif() # so there is no need to do this explicitly with check_language/enable_language, # etc. # +# This find_package(Torch REQUIRED) call only happens for GPU builds now. find_package(Torch REQUIRED) # # Forward the non-CUDA device extensions to external CMake scripts. -# +# This block should now logically only hit for HIP if VLLM_TARGET_DEVICE is not "cpu" +# and also not "cuda". But your structure handles it well. if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND NOT VLLM_TARGET_DEVICE STREQUAL "rocm") + # This 'if cpu' block within here is now redundant because the main 'if (VLLM_TARGET_DEVICE STREQUAL "cpu")' + # at the top handles it. However, keeping it doesn't harm, just means this outer if + # will never be true in practice for a "cpu" target given the early return. if (VLLM_TARGET_DEVICE STREQUAL "cpu") include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) else() @@ -119,6 +165,9 @@ elseif(HIP_FOUND) "expected for ROCm build, saw ${Torch_VERSION} instead.") endif() else() + # This FATAL_ERROR will now only trigger if VLLM_TARGET_DEVICE is something + # that requires a GPU (like "cuda" or "rocm") but neither CUDA nor HIP are found. + # It will NOT trigger for "cpu" anymore. message(FATAL_ERROR "Can't find CUDA or HIP installation.") endif() @@ -204,11 +253,12 @@ endif() set(VLLM_CUMEM_EXT_SRC "csrc/cumem_allocator.cpp") -set_gencode_flags_for_srcs( - SRCS "${VLLM_CUMEM_EXT_SRC}" - CUDA_ARCHS "${CUDA_ARCHS}") - +# --- MODIFICATION: Make cumem_allocator conditional --- if(VLLM_GPU_LANG STREQUAL "CUDA") + set_gencode_flags_for_srcs( + SRCS "${VLLM_CUMEM_EXT_SRC}" + CUDA_ARCHS "${CUDA_ARCHS}") + message(STATUS "Enabling cumem allocator extension.") # link against cuda driver library list(APPEND CUMEM_LIBS CUDA::cuda_driver) @@ -222,6 +272,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") WITH_SOABI) endif() + # # _C extension # @@ -246,6 +297,7 @@ set(VLLM_EXT_SRC "csrc/custom_all_reduce.cu" "csrc/torch_bindings.cpp") +# --- MODIFICATION: Wrap all CUDA-specific source additions and FetchContent --- if(VLLM_GPU_LANG STREQUAL "CUDA") SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") @@ -303,13 +355,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") cuda_archs_loose_intersection(MARLIN_ARCHS "8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0" "${CUDA_ARCHS}") if (MARLIN_ARCHS) set(MARLIN_SRCS - "csrc/quantization/fp8/fp8_marlin.cu" - "csrc/quantization/marlin/dense/marlin_cuda_kernel.cu" - "csrc/quantization/marlin/sparse/marlin_24_cuda_kernel.cu" - "csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu" - "csrc/quantization/gptq_marlin/gptq_marlin.cu" - "csrc/quantization/gptq_marlin/gptq_marlin_repack.cu" - "csrc/quantization/gptq_marlin/awq_marlin_repack.cu") + "csrc/quantization/fp8/fp8_marlin.cu" + "csrc/quantization/marlin/dense/marlin_cuda_kernel.cu" + "csrc/quantization/marlin/sparse/marlin_24_cuda_kernel.cu" + "csrc/quantization/marlin/qqq/marlin_qqq_gemm_kernel.cu" + "csrc/quantization/gptq_marlin/gptq_marlin.cu" + "csrc/quantization/gptq_marlin/gptq_marlin_repack.cu" + "csrc/quantization/gptq_marlin/awq_marlin_repack.cu") set_gencode_flags_for_srcs( SRCS "${MARLIN_SRCS}" CUDA_ARCHS "${MARLIN_ARCHS}") @@ -317,15 +369,15 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Building Marlin kernels for archs: ${MARLIN_ARCHS}") else() message(STATUS "Not building Marlin kernels as no compatible archs found" - " in CUDA target architectures") + " in CUDA target architectures") endif() # Only build AllSpark kernels if we are building for at least some compatible archs. cuda_archs_loose_intersection(ALLSPARK_ARCHS "8.0;8.6;8.7;8.9" "${CUDA_ARCHS}") if (ALLSPARK_ARCHS) set(ALLSPARK_SRCS - "csrc/quantization/gptq_allspark/allspark_repack.cu" - "csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu") + "csrc/quantization/gptq_allspark/allspark_repack.cu" + "csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu") set_gencode_flags_for_srcs( SRCS "${ALLSPARK_SRCS}" CUDA_ARCHS "${ALLSPARK_ARCHS}") @@ -333,7 +385,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Building AllSpark kernels for archs: ${ALLSPARK_ARCHS}") else() message(STATUS "Not building AllSpark kernels as no compatible archs found" - " in CUDA target architectures") + " in CUDA target architectures") endif() @@ -343,11 +395,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}") if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_ARCHS) set(SRCS - "csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm90.cu" - "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu" - "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu" - "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu" - "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu") + "csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm90.cu" + "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu" + "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu" + "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu" + "csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu") set_gencode_flags_for_srcs( SRCS "${SRCS}" CUDA_ARCHS "${SCALED_MM_ARCHS}") @@ -359,12 +411,12 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") else() if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_ARCHS) message(STATUS "Not building scaled_mm_c3x_sm90 as CUDA Compiler version is " - "not >= 12.0, we recommend upgrading to CUDA 12.0 or " - "later if you intend on running FP8 quantized models on " - "Hopper.") + "not >= 12.0, we recommend upgrading to CUDA 12.0 or " + "later if you intend on running FP8 quantized models on " + "Hopper.") else() message(STATUS "Not building scaled_mm_c3x_sm90 as no compatible archs found " - "in CUDA target architectures") + "in CUDA target architectures") endif() endif() @@ -387,12 +439,12 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") else() if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.8 AND SCALED_MM_ARCHS) message(STATUS "Not building scaled_mm_c3x_sm100 as CUDA Compiler version is " - "not >= 12.8, we recommend upgrading to CUDA 12.8 or " - "later if you intend on running FP8 quantized models on " - "Blackwell.") + "not >= 12.8, we recommend upgrading to CUDA 12.8 or " + "later if you intend on running FP8 quantized models on " + "Blackwell.") else() message(STATUS "Not building scaled_mm_c3x_100 as no compatible archs found " - "in CUDA target architectures") + "in CUDA target architectures") endif() endif() @@ -414,10 +466,10 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") else() if (SCALED_MM_3X_ARCHS) message(STATUS "Not building scaled_mm_c2x as all archs are already built" - " for and covered by scaled_mm_c3x") + " for and covered by scaled_mm_c3x") else() message(STATUS "Not building scaled_mm_c2x as no compatible archs found " - "in CUDA target architectures") + "in CUDA target architectures") endif() endif() @@ -438,11 +490,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") else() if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.2 AND SCALED_MM_ARCHS) message(STATUS "Not building sparse_scaled_mm_c3x kernels as CUDA Compiler version is " - "not >= 12.2, we recommend upgrading to CUDA 12.2 or later " - "if you intend on running FP8 sparse quantized models on Hopper.") + "not >= 12.2, we recommend upgrading to CUDA 12.2 or later " + "if you intend on running FP8 sparse quantized models on Hopper.") else() message(STATUS "Not building sparse_scaled_mm_c3x as no compatible archs found " - "in CUDA target architectures") + "in CUDA target architectures") endif() endif() @@ -492,7 +544,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}") if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS) set(SRCS "csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x.cu" - "csrc/quantization/cutlass_w8a8/moe/moe_data.cu") + "csrc/quantization/cutlass_w8a8/moe/moe_data.cu") set_gencode_flags_for_srcs( SRCS "${SRCS}" CUDA_ARCHS "${SCALED_MM_ARCHS}") @@ -502,11 +554,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") else() if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS) message(STATUS "Not building grouped_mm_c3x kernels as CUDA Compiler version is " - "not >= 12.3, we recommend upgrading to CUDA 12.3 or later " - "if you intend on running FP8 quantized MoE models on Hopper.") + "not >= 12.3, we recommend upgrading to CUDA 12.3 or later " + "if you intend on running FP8 quantized MoE models on Hopper.") else() message(STATUS "Not building grouped_mm_c3x as no compatible archs found " - "in CUDA target architectures") + "in CUDA target architectures") endif() endif() @@ -571,35 +623,57 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") if (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND MACHETE_ARCHS) message(STATUS "Not building Machete kernels as CUDA Compiler version is " - "not >= 12.0, we recommend upgrading to CUDA 12.0 or " - "later if you intend on running w4a16 quantized models on " - "Hopper.") + "not >= 12.0, we recommend upgrading to CUDA 12.0 or " + "later if you intend on running w4a16 quantized models on " + "Hopper.") else() message(STATUS "Not building Machete kernels as no compatible archs " - "found in CUDA target architectures") + "found in CUDA target architectures") endif() endif() -# if CUDA endif -endif() +endif() # End of if(VLLM_GPU_LANG STREQUAL "CUDA") for _C extension sources + message(STATUS "Enabling C extension.") -define_gpu_extension_target( - _C - DESTINATION vllm - LANGUAGE ${VLLM_GPU_LANG} - SOURCES ${VLLM_EXT_SRC} - COMPILE_FLAGS ${VLLM_GPU_FLAGS} - ARCHITECTURES ${VLLM_GPU_ARCHES} - INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} - INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} - USE_SABI 3 - WITH_SOABI) +# --- MODIFICATION: Make _C extension target conditional --- +if (VLLM_TARGET_DEVICE STREQUAL "cpu") + # For CPU, define a C++ extension with no GPU-specific sources or flags + # You might need to adjust VLLM_EXT_SRC for CPU-only files here if any exist + # For now, we'll assume torch_bindings.cpp is the main one + set(VLLM_EXT_SRC "csrc/torch_bindings.cpp") # Only C++ sources + define_gpu_extension_target( + _C + DESTINATION vllm + LANGUAGE CXX # Use CXX language for CPU + SOURCES ${VLLM_EXT_SRC} + # No GPU specific flags or architectures for CPU + USE_SABI 3 + WITH_SOABI) +else() + # Original logic for GPU targets + define_gpu_extension_target( + _C + DESTINATION vllm + LANGUAGE ${VLLM_GPU_LANG} + SOURCES ${VLLM_EXT_SRC} + COMPILE_FLAGS ${VLLM_GPU_FLAGS} + ARCHITECTURES ${VLLM_GPU_ARCHES} + INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} + INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} + USE_SABI 3 + WITH_SOABI) +endif() + # If CUTLASS is compiled on NVCC >= 12.5, it by default uses # cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the # driver API. This causes problems when linking with earlier versions of CUDA. # Setting this variable sidesteps the issue by calling the driver directly. -target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1) +# --- MODIFICATION: Make this conditional for CUDA --- +if(VLLM_GPU_LANG STREQUAL "CUDA") + target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1) +endif() + # # _moe_C extension @@ -610,15 +684,14 @@ set(VLLM_MOE_EXT_SRC "csrc/moe/moe_align_sum_kernels.cu" "csrc/moe/topk_softmax_kernels.cu") +# --- MODIFICATION: Wrap all MoE specific CUDA source additions and build --- if(VLLM_GPU_LANG STREQUAL "CUDA") list(APPEND VLLM_MOE_EXT_SRC "csrc/moe/moe_wna16.cu") -endif() -set_gencode_flags_for_srcs( - SRCS "${VLLM_MOE_EXT_SRC}" - CUDA_ARCHS "${CUDA_ARCHS}") + set_gencode_flags_for_srcs( + SRCS "${VLLM_MOE_EXT_SRC}" + CUDA_ARCHS "${CUDA_ARCHS}") -if(VLLM_GPU_LANG STREQUAL "CUDA") set(VLLM_MOE_WNA16_SRC "csrc/moe/moe_wna16.cu") @@ -677,20 +750,37 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") message(STATUS "Building Marlin MOE kernels for archs: ${MARLIN_MOE_ARCHS}") else() message(STATUS "Not building Marlin MOE kernels as no compatible archs found" - " in CUDA target architectures") + " in CUDA target architectures") endif() -endif() +endif() # End of if(VLLM_GPU_LANG STREQUAL "CUDA") for moe + message(STATUS "Enabling moe extension.") -define_gpu_extension_target( - _moe_C - DESTINATION vllm - LANGUAGE ${VLLM_GPU_LANG} - SOURCES ${VLLM_MOE_EXT_SRC} - COMPILE_FLAGS ${VLLM_GPU_FLAGS} - ARCHITECTURES ${VLLM_GPU_ARCHES} - USE_SABI 3 - WITH_SOABI) +# --- MODIFICATION: Make _moe_C extension target conditional --- +if (VLLM_TARGET_DEVICE STREQUAL "cpu") + # For CPU, define a C++ extension for MoE with only C++ sources + set(VLLM_MOE_EXT_SRC "csrc/moe/torch_bindings.cpp") # Only C++ sources + define_gpu_extension_target( + _moe_C + DESTINATION vllm + LANGUAGE CXX # Use CXX language for CPU + SOURCES ${VLLM_MOE_EXT_SRC} + USE_SABI 3 + WITH_SOABI) +else() + # Original logic for GPU targets + define_gpu_extension_target( + _moe_C + DESTINATION vllm + LANGUAGE ${VLLM_GPU_LANG} + SOURCES ${VLLM_MOE_EXT_SRC} + COMPILE_FLAGS ${VLLM_GPU_FLAGS} + ARCHITECTURES ${VLLM_GPU_ARCHES} + USE_SABI 3 + WITH_SOABI) +endif() + + if(VLLM_GPU_LANG STREQUAL "HIP") # @@ -713,6 +803,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP") endif() # For CUDA we also build and ship some external projects. +# --- MODIFICATION: Make external projects conditional --- if (VLLM_GPU_LANG STREQUAL "CUDA") include(cmake/external_projects/flashmla.cmake) include(cmake/external_projects/vllm_flash_attn.cmake) From 6d51d900879521c2c0cd815dc214032e2a9d2c2f Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 12:11:58 +0300 Subject: [PATCH 17/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (12) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 5 +++-- setup.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 6e1c15f0464..a111491be11 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,7 +15,7 @@ jobs: python-version: "3.12" - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" - + - name: Install PyTorch for CPU and NumPy run: | pip install numpy @@ -25,7 +25,8 @@ jobs: run: pip install $(grep -v 'torch==' requirements/build.txt | grep -v '^#' | tr '\n' ' ') - name: Install vLLM Project and Dev Dependencies - + env: + VLLM_BUILD_TARGET: cpu run: pip install -e ".[dev]" - uses: pre-commit/action@v3.0.1 diff --git a/setup.py b/setup.py index 770d1f8d5e4..12d78cbdd6c 100755 --- a/setup.py +++ b/setup.py @@ -529,7 +529,10 @@ def get_nvcc_cuda_version() -> Version: Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py """ - assert CUDA_HOME is not None, "CUDA_HOME is not set" + if VLLM_TARGET_DEVICE == "cpu": + return Version("0.0") + + assert CUDA_HOME is not None, "CUDA_HOME is not set for a CUDA/HIP build target." nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True) output = nvcc_output.split() From 8b226e5d569dd433f7603ba32836c253d4cb1a9d Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 15:16:34 +0300 Subject: [PATCH 18/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (13) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index a111491be11..11e7da9926e 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -26,7 +26,7 @@ jobs: - name: Install vLLM Project and Dev Dependencies env: - VLLM_BUILD_TARGET: cpu + VLLM_TARGET_DEVICE: cpu run: pip install -e ".[dev]" - uses: pre-commit/action@v3.0.1 From b1682bb3e02c4d569e0583da6fd2dc97d5bcfec6 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 15:27:09 +0300 Subject: [PATCH 19/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (14) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 5 ++++- CMakeLists.txt | 38 +++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 11e7da9926e..b8b33dd915f 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -19,7 +19,7 @@ jobs: - name: Install PyTorch for CPU and NumPy run: | pip install numpy - pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu + pip install torch==2.6.0+cpu torchvision==0.21.0+cpu torchaudio==2.6.0+cpu --index-url https://download.pytorch.org/whl/cpu - name: Install vLLM Build Dependencies (excluding torch and comments) run: pip install $(grep -v 'torch==' requirements/build.txt | grep -v '^#' | tr '\n' ' ') @@ -27,6 +27,9 @@ jobs: - name: Install vLLM Project and Dev Dependencies env: VLLM_TARGET_DEVICE: cpu + USE_CUDA: "OFF" + CUDA_VISIBLE_DEVICES: "" + FORCE_CUDA: "0" run: pip install -e ".[dev]" - uses: pre-commit/action@v3.0.1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 77f75258ad1..29aea51ea8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,16 +126,34 @@ find_package(Torch REQUIRED) # Forward the non-CUDA device extensions to external CMake scripts. # This block should now logically only hit for HIP if VLLM_TARGET_DEVICE is not "cpu" # and also not "cuda". But your structure handles it well. -if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND - NOT VLLM_TARGET_DEVICE STREQUAL "rocm") - # This 'if cpu' block within here is now redundant because the main 'if (VLLM_TARGET_DEVICE STREQUAL "cpu")' - # at the top handles it. However, keeping it doesn't harm, just means this outer if - # will never be true in practice for a "cpu" target given the early return. - if (VLLM_TARGET_DEVICE STREQUAL "cpu") - include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) - else() - return() - endif() +if (VLLM_TARGET_DEVICE STREQUAL "cpu") + message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") + + # Explicitly set CUDA_FOUND and HIP_FOUND to FALSE for CPU builds. + # This prevents 'find_package(Torch)' from potentially setting them based on + # system-wide detections or the Torch config's internal checks for CUDA components. + set(CUDA_FOUND FALSE CACHE INTERNAL "Force CUDA to be not found for CPU build" FORCE) + set(HIP_FOUND FALSE CACHE INTERNAL "Force HIP to be not found for CPU build" FORCE) + + # Set VLLM_GPU_LANG to an empty string or a placeholder + set(VLLM_GPU_LANG "" CACHE INTERNAL "No GPU language for CPU build" FORCE) + + # Set environment variables to disable CUDA detection in PyTorch + set(ENV{USE_CUDA} OFF) + set(ENV{CUDA_VISIBLE_DEVICES} "") + + # Try to find Torch with CUDA explicitly disabled + set(CAFFE2_USE_CUDA OFF CACHE BOOL "Disable CUDA for CPU build" FORCE) + + # Now, find Torch, but with explicit instructions to not look for CUDA/HIP + find_package(Torch REQUIRED) + + # Include the cpu_extension.cmake directly and then return/exit this CMakeList. + # This ensures no further GPU-specific code is processed. + include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) + + # After configuring for CPU, we don't need to process any GPU-specific logic. + # This `return()` effectively stops CMake processing this file for GPU-related sections. return() endif() From a45affc15fbbe1fc9c8c81cbf8cd1209798d50ea Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:12:44 +0300 Subject: [PATCH 20/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (15) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 81 ++++++++------------------------------------------ 1 file changed, 13 insertions(+), 68 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29aea51ea8b..ff1acd30119 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,47 +61,27 @@ else() " before running cmake configure.") endif() -# -# Update cmake's `CMAKE_PREFIX_PATH` with torch location. -# -append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") - - - -# Move the core logic for CPU-only builds much earlier. -# If building for CPU, we will handle Torch finding differently and then exit. if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - - # Explicitly set CUDA_FOUND and HIP_FOUND to FALSE for CPU builds. - # This prevents 'find_package(Torch)' from potentially setting them based on - # system-wide detections or the Torch config's internal checks for CUDA components. + + # For CPU builds, we need to find Torch but without triggering CUDA detection + # Set these variables to prevent CUDA detection set(CUDA_FOUND FALSE CACHE INTERNAL "Force CUDA to be not found for CPU build" FORCE) set(HIP_FOUND FALSE CACHE INTERNAL "Force HIP to be not found for CPU build" FORCE) - - # Set VLLM_GPU_LANG to an empty string or a placeholder - set(VLLM_GPU_LANG "" CACHE INTERNAL "No GPU language for CPU build" FORCE) - - # Now, find Torch, but with explicit instructions to not look for CUDA/HIP - # This might still trigger Caffe2 warnings, but it's the correct way to try. - # If the Caffe2 error persists here, it means the PyTorch CPU wheel's CMake - # config is truly stubborn. - find_package(Torch REQUIRED COMPONENTS Python) # Only require Python components - - # Ensure the 'nvcc' command is NOT searched for or checked for CPU builds. - # This whole 'find_program(NVCC_EXECUTABLE nvcc)' block is now unnecessary for CPU. - # We will simply not define NVCC_EXECUTABLE or let it be 'NOTFOUND'. - - # Include the cpu_extension.cmake directly and then return/exit this CMakeList. - # This ensures no further GPU-specific code is processed. + set(CAFFE2_USE_CUDA OFF CACHE BOOL "Disable CUDA for CPU build" FORCE) + + # Find Torch with minimal requirements for CPU + find_package(Torch REQUIRED) + + # Include the CPU extension cmake and return early include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) - - # After configuring for CPU, we don't need to process any GPU-specific logic. - # This `return()` effectively stops CMake processing this file for GPU-related sections. return() endif() - +# +# Update cmake's `CMAKE_PREFIX_PATH` with torch location. +# +append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") # The following blocks will ONLY be processed if VLLM_TARGET_DEVICE is NOT "cpu" # (i.e., it's "cuda" or "rocm" or potentially some other future GPU target) @@ -122,41 +102,6 @@ endif() # This find_package(Torch REQUIRED) call only happens for GPU builds now. find_package(Torch REQUIRED) -# -# Forward the non-CUDA device extensions to external CMake scripts. -# This block should now logically only hit for HIP if VLLM_TARGET_DEVICE is not "cpu" -# and also not "cuda". But your structure handles it well. -if (VLLM_TARGET_DEVICE STREQUAL "cpu") - message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - - # Explicitly set CUDA_FOUND and HIP_FOUND to FALSE for CPU builds. - # This prevents 'find_package(Torch)' from potentially setting them based on - # system-wide detections or the Torch config's internal checks for CUDA components. - set(CUDA_FOUND FALSE CACHE INTERNAL "Force CUDA to be not found for CPU build" FORCE) - set(HIP_FOUND FALSE CACHE INTERNAL "Force HIP to be not found for CPU build" FORCE) - - # Set VLLM_GPU_LANG to an empty string or a placeholder - set(VLLM_GPU_LANG "" CACHE INTERNAL "No GPU language for CPU build" FORCE) - - # Set environment variables to disable CUDA detection in PyTorch - set(ENV{USE_CUDA} OFF) - set(ENV{CUDA_VISIBLE_DEVICES} "") - - # Try to find Torch with CUDA explicitly disabled - set(CAFFE2_USE_CUDA OFF CACHE BOOL "Disable CUDA for CPU build" FORCE) - - # Now, find Torch, but with explicit instructions to not look for CUDA/HIP - find_package(Torch REQUIRED) - - # Include the cpu_extension.cmake directly and then return/exit this CMakeList. - # This ensures no further GPU-specific code is processed. - include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) - - # After configuring for CPU, we don't need to process any GPU-specific logic. - # This `return()` effectively stops CMake processing this file for GPU-related sections. - return() -endif() - # # Set up GPU language and check the torch version and warn if it isn't # what is expected. From 427b93fd44807c0724035f7c1049d9222dfc1e43 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:18:17 +0300 Subject: [PATCH 21/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (16) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ff1acd30119..bc69a61d7f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,11 @@ else() " before running cmake configure.") endif() +# +# Update cmake's `CMAKE_PREFIX_PATH` with torch location. +# +append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") + if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") @@ -78,11 +83,6 @@ if (VLLM_TARGET_DEVICE STREQUAL "cpu") return() endif() -# -# Update cmake's `CMAKE_PREFIX_PATH` with torch location. -# -append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") - # The following blocks will ONLY be processed if VLLM_TARGET_DEVICE is NOT "cpu" # (i.e., it's "cuda" or "rocm" or potentially some other future GPU target) From e36236c79c410a9a86ff4649845aa5fff67912a1 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:22:58 +0300 Subject: [PATCH 22/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (17) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc69a61d7f3..647c0d915dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,14 +69,8 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - # For CPU builds, we need to find Torch but without triggering CUDA detection - # Set these variables to prevent CUDA detection - set(CUDA_FOUND FALSE CACHE INTERNAL "Force CUDA to be not found for CPU build" FORCE) - set(HIP_FOUND FALSE CACHE INTERNAL "Force HIP to be not found for CPU build" FORCE) - set(CAFFE2_USE_CUDA OFF CACHE BOOL "Disable CUDA for CPU build" FORCE) - - # Find Torch with minimal requirements for CPU - find_package(Torch REQUIRED) + # For CPU builds, we don't need to find Torch through CMake + # The Python extension will link against torch through the Python environment # Include the CPU extension cmake and return early include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) From 076966dda615a92809bf3c20ba9d41235039251c Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:30:41 +0300 Subject: [PATCH 23/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (18) Signed-off-by: Alexandru Badea --- .github/workflows/pre-commit.yml | 5 +++++ CMakeLists.txt | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index b8b33dd915f..bfe549d3899 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -16,6 +16,11 @@ jobs: - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libnuma-dev + - name: Install PyTorch for CPU and NumPy run: | pip install numpy diff --git a/CMakeLists.txt b/CMakeLists.txt index 647c0d915dd..900a3d7f089 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,8 +69,9 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - # For CPU builds, we don't need to find Torch through CMake - # The Python extension will link against torch through the Python environment + # For CPU builds, we still need to find Torch to get headers, but skip CUDA detection + set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH}) + find_package(Torch REQUIRED) # Include the CPU extension cmake and return early include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) From 9bf532063c4ac04733554fec3d8baea785fefde4 Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:35:42 +0300 Subject: [PATCH 24/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (19) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 900a3d7f089..2360d19d4f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,9 +69,8 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - # For CPU builds, we still need to find Torch to get headers, but skip CUDA detection - set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH}) - find_package(Torch REQUIRED) + # For CPU builds, we don't need to find Torch through CMake + # The CPU extension will handle torch headers through the Python environment # Include the CPU extension cmake and return early include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) From 62dd0e29b8e534e8b15f2c0aed8a3a7b9bcd640d Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:40:18 +0300 Subject: [PATCH 25/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (20) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2360d19d4f7..391e1ab2b4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,8 +69,19 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - # For CPU builds, we don't need to find Torch through CMake - # The CPU extension will handle torch headers through the Python environment + # For CPU builds, we need to get torch include directories without full CMake config + # Use Python to get torch include paths + execute_process( + COMMAND ${Python_EXECUTABLE} -c "import torch; print(';'.join(torch.utils.cpp_extension.include_paths()), end='')" + OUTPUT_VARIABLE TORCH_INCLUDE_DIRS + RESULT_VARIABLE TORCH_INCLUDE_RESULT + ) + + if(NOT TORCH_INCLUDE_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to get torch include directories") + endif() + + message(STATUS "Torch include directories: ${TORCH_INCLUDE_DIRS}") # Include the CPU extension cmake and return early include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) From ce22b7dad3328f249fa8c23c02d3e4d31e81df2a Mon Sep 17 00:00:00 2001 From: Alexandru Badea Date: Thu, 10 Jul 2025 16:44:24 +0300 Subject: [PATCH 26/26] fix for pre-commit failed job - Error: vllm/executor/ray_distributed_executor.py:531: error: Library stubs not installed for "pkg_resources" (21) Signed-off-by: Alexandru Badea --- CMakeLists.txt | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 391e1ab2b4c..85af9eef87b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,18 +69,22 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") if (VLLM_TARGET_DEVICE STREQUAL "cpu") message(STATUS "VLLM: Building for CPU. Skipping all CUDA/HIP related configuration.") - # For CPU builds, we need to get torch include directories without full CMake config - # Use Python to get torch include paths + # For CPU builds, we need to get torch include directories without cpp_extension + # Get torch installation path and construct include paths manually execute_process( - COMMAND ${Python_EXECUTABLE} -c "import torch; print(';'.join(torch.utils.cpp_extension.include_paths()), end='')" - OUTPUT_VARIABLE TORCH_INCLUDE_DIRS - RESULT_VARIABLE TORCH_INCLUDE_RESULT + COMMAND ${Python_EXECUTABLE} -c "import torch; import os; print(os.path.dirname(torch.__file__), end='')" + OUTPUT_VARIABLE TORCH_INSTALL_PATH + RESULT_VARIABLE TORCH_PATH_RESULT ) - if(NOT TORCH_INCLUDE_RESULT EQUAL 0) - message(FATAL_ERROR "Failed to get torch include directories") + if(NOT TORCH_PATH_RESULT EQUAL 0) + message(FATAL_ERROR "Failed to get torch installation path") endif() + # Construct torch include directories manually + set(TORCH_INCLUDE_DIRS "${TORCH_INSTALL_PATH}/include;${TORCH_INSTALL_PATH}/include/torch/csrc/api/include") + + message(STATUS "Torch installation path: ${TORCH_INSTALL_PATH}") message(STATUS "Torch include directories: ${TORCH_INCLUDE_DIRS}") # Include the CPU extension cmake and return early