@@ -557,8 +557,17 @@ def _check_ray_cgraph_installation(self):
557
557
def _compiled_ray_dag (self , enable_asyncio : bool ):
558
558
assert self .parallel_config .use_ray
559
559
self ._check_ray_cgraph_installation ()
560
+ # Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
561
+ # (it is 10 seconds by default). This is a Ray environment variable to
562
+ # control the timeout of getting result from a compiled graph execution,
563
+ # i.e., the distributed execution that includes model forward runs and
564
+ # intermediate tensor communications, in the case of vllm.
565
+ # Note: we should set this env var before importing
566
+ # ray.dag, otherwise it will not take effect.
567
+ os .environ .setdefault ("RAY_CGRAPH_get_timeout" , "300" ) # noqa: SIM112
560
568
from ray .dag import InputNode , MultiOutputNode
561
-
569
+ logger .info ("RAY_CGRAPH_get_timeout is set to %s" ,
570
+ os .environ ["RAY_CGRAPH_get_timeout" ]) # noqa: SIM112
562
571
logger .info ("VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE = %s" ,
563
572
envs .VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE )
564
573
logger .info ("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s" ,
@@ -570,15 +579,6 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
570
579
"Invalid value for VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: "
571
580
f"{ channel_type } . Valid values are: 'auto', 'nccl', or 'shm'." )
572
581
573
- # Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
574
- # (it is 10 seconds by default). This is a Ray environment variable to
575
- # control the timeout of getting result from a compiled graph execution,
576
- # i.e., the distributed execution that includes model forward runs and
577
- # intermediate tensor communications, in the case of vllm.
578
- os .environ .setdefault ("RAY_CGRAPH_get_timeout" , "300" ) # noqa: SIM112
579
- logger .info ("RAY_CGRAPH_get_timeout is set to %s" ,
580
- os .environ ["RAY_CGRAPH_get_timeout" ]) # noqa: SIM112
581
-
582
582
with InputNode () as input_data :
583
583
# Example DAG: PP=2, TP=4
584
584
#
0 commit comments