[BugFix] PettingZoo seeding (#1554)

matteobettini · web-flow · commit f12b7cc4b370 · 2023-09-20T17:25:14.000+01:00
Signed-off-by: Matteo Bettini &lt;matbet@meta.com&gt;
diff --git a/torchrl/envs/libs/pettingzoo.py b/torchrl/envs/libs/pettingzoo.py
@@ -372,9 +372,9 @@ def _check_kwargs(self, kwargs: Dict):
     def _init_env(self) -> Optional[int]:
         # Add info
         if self.parallel:
-            _, info_dict = self._reset_parallel()
+            _, info_dict = self._reset_parallel(seed=self.seed)
         else:
-            _, info_dict = self._reset_aec()
+            _, info_dict = self._reset_aec(seed=self.seed)
 
         for group, agents in self.group_map.items():
             info_specs = []
@@ -440,19 +440,20 @@ def _init_env(self) -> Optional[int]:
         self.cached_step_output_zero.update(self.output_spec["full_reward_spec"].zero())
         self.cached_step_output_zero.update(self.output_spec["full_done_spec"].zero())
 
-    def _set_seed(self, seed: Optional[int]):
+    def _set_seed(self, seed: int):
         self.seed = seed
+        self.reset(seed=self.seed)
 
     def _reset(
         self, tensordict: Optional[TensorDictBase] = None, **kwargs
     ) -> TensorDictBase:
 
         if self.parallel:
             # This resets when any is done
-            observation_dict, info_dict = self._reset_parallel()
+            observation_dict, info_dict = self._reset_parallel(**kwargs)
         else:
             # This resets when all are done
-            observation_dict, info_dict = self._reset_aec(tensordict)
+            observation_dict, info_dict = self._reset_aec(tensordict, **kwargs)
 
         # We start with zeroed data and fill in the data for alive agents
         tensordict_out = self.cached_reset_output_zero.clone()
@@ -481,7 +482,7 @@ def _reset(
 
         return tensordict_out
 
-    def _reset_aec(self, tensordict=None) -> Tuple[Dict, Dict]:
+    def _reset_aec(self, tensordict=None, **kwargs) -> Tuple[Dict, Dict]:
         all_done = True
         if tensordict is not None:
             _resets = []
@@ -500,18 +501,16 @@ def _reset_aec(self, tensordict=None) -> Tuple[Dict, Dict]:
                         break
 
         if all_done:
-            self._env.reset(seed=self.seed)
+            self._env.reset(**kwargs)
 
         observation_dict = {
             agent: self._env.observe(agent) for agent in self.possible_agents
         }
         info_dict = self._env.infos
         return observation_dict, info_dict
 
-    def _reset_parallel(
-        self,
-    ) -> Tuple[Dict, Dict]:
-        return self._env.reset(seed=self.seed)
+    def _reset_parallel(self, **kwargs) -> Tuple[Dict, Dict]:
+        return self._env.reset(**kwargs)
 
     def _step(
         self,