Skip to content

Commit 762be26

Browse files
[Bugfix] Upgrade depyf to 0.19 and streamline custom pass logging (#20777)
Signed-off-by: Luka Govedic <lgovedic@redhat.com> Signed-off-by: luka <lgovedic@redhat.com>
1 parent 6a9e6b2 commit 762be26

File tree

4 files changed

+13
-36
lines changed

4 files changed

+13
-36
lines changed

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that need
4040
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
4141
einops # Required for Qwen2-VL.
4242
compressed-tensors == 0.10.2 # required for compressed-tensors
43-
depyf==0.18.0 # required for profiling and debugging with compilation config
43+
depyf==0.19.0 # required for profiling and debugging with compilation config
4444
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
4545
watchfiles # required for http server to monitor the updates of TLS files
4646
python-json-logger # Used by logging as per examples/others/logging_configuration.md

tests/compile/test_full_graph.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from __future__ import annotations
55

6+
import tempfile
67
from typing import Any, Optional, Union
78

89
import pytest
@@ -111,6 +112,11 @@ def test_full_graph(
111112
pass_config=PassConfig(enable_fusion=True,
112113
enable_noop=True)), model)
113114
for model in models_list(keywords=["FP8-dynamic", "quantized.w8a8"])
115+
] + [
116+
# Test depyf integration works
117+
(CompilationConfig(level=CompilationLevel.PIECEWISE,
118+
debug_dump_path=tempfile.gettempdir()),
119+
("facebook/opt-125m", {})),
114120
])
115121
# only test some of the models
116122
@create_new_process_for_each_test()

vllm/compilation/vllm_inductor_pass.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,7 @@
66
import torch
77
from torch._dynamo.utils import lazy_format_graph_code
88

9-
from vllm.config import PassConfig, VllmConfig
10-
# yapf: disable
11-
from vllm.distributed import get_tensor_model_parallel_rank as get_tp_rank
12-
from vllm.distributed import (
13-
get_tensor_model_parallel_world_size as get_tp_world_size)
14-
from vllm.distributed import model_parallel_is_initialized as p_is_init
15-
# yapf: enable
9+
from vllm.config import VllmConfig
1610
from vllm.logger import init_logger
1711

1812
from .inductor_pass import InductorPass
@@ -34,22 +28,9 @@ def __init__(self, config: VllmConfig):
3428
else None
3529
self.pass_name = self.__class__.__name__
3630

37-
def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
31+
def dump_graph(self, graph: torch.fx.Graph, stage: str):
3832
lazy_format_graph_code(stage, graph.owning_module)
3933

40-
if stage in self.pass_config.dump_graph_stages or always:
41-
# Make sure filename includes rank in the distributed setting
42-
parallel = p_is_init() and get_tp_world_size() > 1
43-
rank = f"-{get_tp_rank()}" if parallel else ""
44-
filepath = self.pass_config.dump_graph_dir / f"{stage}{rank}.py"
45-
46-
logger.info("%s printing graph to %s", self.pass_name, filepath)
47-
with open(filepath, "w") as f:
48-
src = graph.python_code(root_module="self", verbose=True).src
49-
# Add imports so it's not full of errors
50-
print("import torch; from torch import device", file=f)
51-
print(src, file=f)
52-
5334
def begin(self):
5435
self._start_time = time.perf_counter_ns()
5536

@@ -61,10 +42,9 @@ def end_and_log(self):
6142

6243
class PrinterInductorPass(VllmInductorPass):
6344

64-
def __init__(self, name: str, config: PassConfig, always=False):
45+
def __init__(self, name: str, config: VllmConfig):
6546
super().__init__(config)
6647
self.name = name
67-
self.always = always
6848

6949
def __call__(self, graph: torch.fx.Graph):
70-
self.dump_graph(graph, self.name, always=self.always)
50+
self.dump_graph(graph, self.name)

vllm/config.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
replace)
1717
from functools import cached_property
1818
from importlib.util import find_spec
19-
from pathlib import Path
2019
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
2120
Protocol, TypeVar, Union, cast, get_args)
2221

@@ -3953,11 +3952,6 @@ class PassConfig:
39533952
don't all have access to full configuration - that would create a cycle as
39543953
the `PassManager` is set as a property of config."""
39553954

3956-
dump_graph_stages: list[str] = field(default_factory=list)
3957-
"""List of stages for which we want to dump the graph. Each pass defines
3958-
its own stages (before, after, maybe in-between)."""
3959-
dump_graph_dir: Path = Path(".")
3960-
"""Directory to dump the graphs."""
39613955
enable_fusion: bool = field(default_factory=lambda: not envs.VLLM_USE_V1)
39623956
"""Whether to enable the custom fusion (RMSNorm/SiluMul+quant) pass."""
39633957
enable_attn_fusion: bool = False
@@ -3975,12 +3969,9 @@ def uuid(self):
39753969
"""
39763970
Produces a hash unique to the pass configuration.
39773971
Any new fields that affect compilation should be added to the hash.
3978-
Do not include dump_graph_* in the hash - they don't affect
3979-
compilation.
3972+
Any future fields that don't affect compilation should be excluded.
39803973
"""
3981-
exclude = {"dump_graph_stages", "dump_graph_dir"}
3982-
dict_ = {k: v for k, v in asdict(self).items() if k not in exclude}
3983-
return InductorPass.hash_dict(dict_)
3974+
return InductorPass.hash_dict(asdict(self))
39843975

39853976
def __post_init__(self) -> None:
39863977
if not self.enable_noop:

0 commit comments

Comments
 (0)