[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)

BoyuanFeng · web-flow · commit 91b3d190ae86 · 2025-07-15T13:02:17.000+08:00
Signed-off-by: Boyuan Feng &lt;boyuan@meta.com&gt;
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
@@ -93,27 +93,19 @@ def bytecode_hook(self, old_code: CodeType, new_code: CodeType):
             return
 
         self.compiled_codes.append(new_code)
-        local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
-        if isinstance(local_cache_dir, str):
-            decompiled_file_name = ("transformed_code.py"
-                                    if envs.VLLM_COMPILE_DEPYF else
-                                    "transformed_code_README.txt")
-
-            decompiled_file = os.path.join(local_cache_dir,
-                                           decompiled_file_name)
+        debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
+        if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
+            rank = self.vllm_config.parallel_config.rank
+            decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
+                                           "transformed_code.py")
             if not os.path.exists(decompiled_file):
                 try:
                     # usually the decompilation will succeed for most models,
                     # as we guarantee a full-graph compilation in Dynamo.
                     # but there's no 100% guarantee, since decompliation is
                     # not a reversible process.
-                    if envs.VLLM_COMPILE_DEPYF:
-                        import depyf
-                        src = depyf.decompile(new_code)
-                    else:
-                        src = (
-                            "To get a transformed_code.py file, re-run with "
-                            "VLLM_COMPILE_DEPYF=1")
+                    import depyf
+                    src = depyf.decompile(new_code)
 
                     with open(decompiled_file, "w") as f:
                         f.write(src)
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -97,7 +97,6 @@
     VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
     VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
     VLLM_DISABLE_COMPILE_CACHE: bool = False
-    VLLM_COMPILE_DEPYF: bool = False
     Q_SCALE_CONSTANT: int = 200
     K_SCALE_CONSTANT: int = 200
     V_SCALE_CONSTANT: int = 100
@@ -742,11 +741,6 @@ def get_vllm_port() -> Optional[int]:
     "VLLM_DISABLE_COMPILE_CACHE":
     lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
 
-    # If set, vllm will decompile the torch compiled code and dump to
-    # transformed_code.py. This is useful for debugging.
-    "VLLM_COMPILE_DEPYF":
-    lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
-
     # If set, vllm will run in development mode, which will enable
     # some additional endpoints for developing and debugging,
     # e.g. `/reset_prefix_cache`