Skip to content

Commit 91b3d19

Browse files
authored
[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
1 parent fc01791 commit 91b3d19

File tree

2 files changed

+7
-21
lines changed

2 files changed

+7
-21
lines changed

vllm/compilation/wrapper.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -93,27 +93,19 @@ def bytecode_hook(self, old_code: CodeType, new_code: CodeType):
9393
return
9494

9595
self.compiled_codes.append(new_code)
96-
local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
97-
if isinstance(local_cache_dir, str):
98-
decompiled_file_name = ("transformed_code.py"
99-
if envs.VLLM_COMPILE_DEPYF else
100-
"transformed_code_README.txt")
101-
102-
decompiled_file = os.path.join(local_cache_dir,
103-
decompiled_file_name)
96+
debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
97+
if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
98+
rank = self.vllm_config.parallel_config.rank
99+
decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
100+
"transformed_code.py")
104101
if not os.path.exists(decompiled_file):
105102
try:
106103
# usually the decompilation will succeed for most models,
107104
# as we guarantee a full-graph compilation in Dynamo.
108105
# but there's no 100% guarantee, since decompliation is
109106
# not a reversible process.
110-
if envs.VLLM_COMPILE_DEPYF:
111-
import depyf
112-
src = depyf.decompile(new_code)
113-
else:
114-
src = (
115-
"To get a transformed_code.py file, re-run with "
116-
"VLLM_COMPILE_DEPYF=1")
107+
import depyf
108+
src = depyf.decompile(new_code)
117109

118110
with open(decompiled_file, "w") as f:
119111
f.write(src)

vllm/envs.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@
9797
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
9898
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
9999
VLLM_DISABLE_COMPILE_CACHE: bool = False
100-
VLLM_COMPILE_DEPYF: bool = False
101100
Q_SCALE_CONSTANT: int = 200
102101
K_SCALE_CONSTANT: int = 200
103102
V_SCALE_CONSTANT: int = 100
@@ -742,11 +741,6 @@ def get_vllm_port() -> Optional[int]:
742741
"VLLM_DISABLE_COMPILE_CACHE":
743742
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
744743

745-
# If set, vllm will decompile the torch compiled code and dump to
746-
# transformed_code.py. This is useful for debugging.
747-
"VLLM_COMPILE_DEPYF":
748-
lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
749-
750744
# If set, vllm will run in development mode, which will enable
751745
# some additional endpoints for developing and debugging,
752746
# e.g. `/reset_prefix_cache`

0 commit comments

Comments
 (0)