Skip to content

Commit c1c8ca5

Browse files
authored
[cold start time] add envs.VLLM_COMPILE_DEPYF to guard decompile (#20790)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
1 parent a3a5a47 commit c1c8ca5

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

vllm/compilation/wrapper.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,26 @@ def bytecode_hook(self, old_code: CodeType, new_code: CodeType):
9595
self.compiled_codes.append(new_code)
9696
local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
9797
if isinstance(local_cache_dir, str):
98+
decompiled_file_name = ("transformed_code.py"
99+
if envs.VLLM_COMPILE_DEPYF else
100+
"transformed_code_README.txt")
101+
98102
decompiled_file = os.path.join(local_cache_dir,
99-
"transformed_code.py")
103+
decompiled_file_name)
100104
if not os.path.exists(decompiled_file):
101105
try:
102106
# usually the decompilation will succeed for most models,
103107
# as we guarantee a full-graph compilation in Dynamo.
104108
# but there's no 100% guarantee, since decompliation is
105109
# not a reversible process.
106-
import depyf
107-
src = depyf.decompile(new_code)
110+
if envs.VLLM_COMPILE_DEPYF:
111+
import depyf
112+
src = depyf.decompile(new_code)
113+
else:
114+
src = (
115+
"To get a transformed_code.py file, re-run with "
116+
"VLLM_COMPILE_DEPYF=1")
117+
108118
with open(decompiled_file, "w") as f:
109119
f.write(src)
110120

vllm/envs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
9898
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
9999
VLLM_DISABLE_COMPILE_CACHE: bool = False
100+
VLLM_COMPILE_DEPYF: bool = False
100101
Q_SCALE_CONSTANT: int = 200
101102
K_SCALE_CONSTANT: int = 200
102103
V_SCALE_CONSTANT: int = 100
@@ -741,6 +742,11 @@ def get_vllm_port() -> Optional[int]:
741742
"VLLM_DISABLE_COMPILE_CACHE":
742743
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
743744

745+
# If set, vllm will decompile the torch compiled code and dump to
746+
# transformed_code.py. This is useful for debugging.
747+
"VLLM_COMPILE_DEPYF":
748+
lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
749+
744750
# If set, vllm will run in development mode, which will enable
745751
# some additional endpoints for developing and debugging,
746752
# e.g. `/reset_prefix_cache`

0 commit comments

Comments
 (0)