[Quality] Longer warmup for cudagraph within sota implementations

Vincent Moens · Vincent Moens · commit ccc31b53abcf · 2025-05-09T13:51:24.000+01:00
ghstack-source-id: 140ba6e Pull-Request-resolved: #2945
diff --git a/sota-implementations/a2c/a2c_atari.py b/sota-implementations/a2c/a2c_atari.py
@@ -183,7 +183,7 @@ def update(batch, max_grad_norm=cfg.optim.max_grad_norm):
         storing_device=device,
         policy_device=device,
         compile_policy={"mode": compile_mode} if cfg.compile.compile else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Main loop
diff --git a/sota-implementations/a2c/a2c_mujoco.py b/sota-implementations/a2c/a2c_mujoco.py
@@ -167,7 +167,7 @@ def update(batch):
         max_frames_per_traj=-1,
         trust_policy=True,
         compile_policy={"mode": compile_mode} if compile_mode is not None else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     test_env.eval()
diff --git a/sota-implementations/discrete_sac/utils.py b/sota-implementations/discrete_sac/utils.py
@@ -139,7 +139,7 @@ def make_collector(
         device=device,
         storing_device="cpu",
         compile_policy=False if not compile else {"mode": compile_mode},
-        cudagraph_policy=cudagraphs,
+        cudagraph_policy={"warmup": 10} if cudagraphs else False,
     )
     collector.set_seed(cfg.env.seed)
     return collector
diff --git a/sota-implementations/dqn/dqn_atari.py b/sota-implementations/dqn/dqn_atari.py
@@ -178,7 +178,7 @@ def update(sampled_tensordict):
         compile_policy={"mode": compile_mode, "fullgraph": True}
         if compile_mode is not None
         else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Main loop
diff --git a/sota-implementations/dqn/dqn_cartpole.py b/sota-implementations/dqn/dqn_cartpole.py
@@ -136,7 +136,7 @@ def update(sampled_tensordict):
         compile_policy={"mode": compile_mode, "fullgraph": True}
         if compile_mode is not None
         else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Main loop
diff --git a/sota-implementations/gail/gail.py b/sota-implementations/gail/gail.py
@@ -138,7 +138,7 @@ def main(cfg: DictConfig):  # noqa: F821
         device=device,
         max_frames_per_traj=-1,
         compile_policy={"mode": compile_mode} if compile_mode is not None else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Create replay buffer
diff --git a/sota-implementations/iql/utils.py b/sota-implementations/iql/utils.py
@@ -138,7 +138,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):
         total_frames=cfg.collector.total_frames,
         device=device,
         compile_policy={"mode": compile_mode} if compile_mode else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
     collector.set_seed(cfg.env.seed)
     return collector
diff --git a/sota-implementations/ppo/ppo_atari.py b/sota-implementations/ppo/ppo_atari.py
@@ -80,7 +80,7 @@ def main(cfg: DictConfig):  # noqa: F821
         device=device,
         max_frames_per_traj=-1,
         compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Create data buffer
diff --git a/sota-implementations/ppo/ppo_mujoco.py b/sota-implementations/ppo/ppo_mujoco.py
@@ -73,7 +73,7 @@ def main(cfg: DictConfig):  # noqa: F821
         device=device,
         max_frames_per_traj=-1,
         compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
 
     # Create data buffer
diff --git a/sota-implementations/sac/utils.py b/sota-implementations/sac/utils.py
@@ -125,7 +125,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):
         total_frames=cfg.collector.total_frames,
         device=device,
         compile_policy={"mode": compile_mode} if compile_mode else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
     collector.set_seed(cfg.env.seed)
     return collector
diff --git a/sota-implementations/td3/utils.py b/sota-implementations/td3/utils.py
@@ -125,7 +125,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode, device):
         reset_at_each_iter=cfg.collector.reset_at_each_iter,
         device=collector_device,
         compile_policy={"mode": compile_mode} if compile_mode else False,
-        cudagraph_policy=cfg.compile.cudagraphs,
+        cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,
     )
     collector.set_seed(cfg.env.seed)
     return collector

Original file line number	Diff line number	Diff line change
`@@ -183,7 +183,7 @@ def update(batch, max_grad_norm=cfg.optim.max_grad_norm):`
`183`	`183`	`storing_device=device,`
`184`	`184`	`policy_device=device,`
`185`	`185`	`compile_policy={"mode": compile_mode} if cfg.compile.compile else False,`
`186`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`186`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`187`	`187`	`)`
`188`	`188`
`189`	`189`	`# Main loop`
Original file line number	Diff line number	Diff line change
`@@ -167,7 +167,7 @@ def update(batch):`
`167`	`167`	`max_frames_per_traj=-1,`
`168`	`168`	`trust_policy=True,`
`169`	`169`	`compile_policy={"mode": compile_mode} if compile_mode is not None else False,`
`170`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`170`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`171`	`171`	`)`
`172`	`172`
`173`	`173`	`test_env.eval()`
Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ def make_collector(`
`139`	`139`	`device=device,`
`140`	`140`	`storing_device="cpu",`
`141`	`141`	`compile_policy=False if not compile else {"mode": compile_mode},`
`142`		`- cudagraph_policy=cudagraphs,`
	`142`	`+ cudagraph_policy={"warmup": 10} if cudagraphs else False,`
`143`	`143`	`)`
`144`	`144`	`collector.set_seed(cfg.env.seed)`
`145`	`145`	`return collector`
Original file line number	Diff line number	Diff line change
`@@ -178,7 +178,7 @@ def update(sampled_tensordict):`
`178`	`178`	`compile_policy={"mode": compile_mode, "fullgraph": True}`
`179`	`179`	`if compile_mode is not None`
`180`	`180`	`else False,`
`181`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`181`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`182`	`182`	`)`
`183`	`183`
`184`	`184`	`# Main loop`
Original file line number	Diff line number	Diff line change
`@@ -136,7 +136,7 @@ def update(sampled_tensordict):`
`136`	`136`	`compile_policy={"mode": compile_mode, "fullgraph": True}`
`137`	`137`	`if compile_mode is not None`
`138`	`138`	`else False,`
`139`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`139`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`140`	`140`	`)`
`141`	`141`
`142`	`142`	`# Main loop`
Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ def main(cfg: DictConfig): # noqa: F821`
`138`	`138`	`device=device,`
`139`	`139`	`max_frames_per_traj=-1,`
`140`	`140`	`compile_policy={"mode": compile_mode} if compile_mode is not None else False,`
`141`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`141`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`142`	`142`	`)`
`143`	`143`
`144`	`144`	`# Create replay buffer`
Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):`
`138`	`138`	`total_frames=cfg.collector.total_frames,`
`139`	`139`	`device=device,`
`140`	`140`	`compile_policy={"mode": compile_mode} if compile_mode else False,`
`141`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`141`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`142`	`142`	`)`
`143`	`143`	`collector.set_seed(cfg.env.seed)`
`144`	`144`	`return collector`
Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ def main(cfg: DictConfig): # noqa: F821`
`80`	`80`	`device=device,`
`81`	`81`	`max_frames_per_traj=-1,`
`82`	`82`	`compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,`
`83`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`83`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`84`	`84`	`)`
`85`	`85`
`86`	`86`	`# Create data buffer`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ def main(cfg: DictConfig): # noqa: F821`
`73`	`73`	`device=device,`
`74`	`74`	`max_frames_per_traj=-1,`
`75`	`75`	`compile_policy={"mode": compile_mode, "warmup": 1} if compile_mode else False,`
`76`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`76`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`77`	`77`	`)`
`78`	`78`
`79`	`79`	`# Create data buffer`
Original file line number	Diff line number	Diff line change
`@@ -125,7 +125,7 @@ def make_collector(cfg, train_env, actor_model_explore, compile_mode):`
`125`	`125`	`total_frames=cfg.collector.total_frames,`
`126`	`126`	`device=device,`
`127`	`127`	`compile_policy={"mode": compile_mode} if compile_mode else False,`
`128`		`- cudagraph_policy=cfg.compile.cudagraphs,`
	`128`	`+ cudagraph_policy={"warmup": 10} if cfg.compile.cudagraphs else False,`
`129`	`129`	`)`
`130`	`130`	`collector.set_seed(cfg.env.seed)`
`131`	`131`	`return collector`