[BugFix] Update to strict select (#675)

vmoens · web-flow · commit ac4b987ebc82 · 2022-11-15T15:33:23.000Z
* init

* strict=False

* amend

* amend
diff --git a/test/test_rb.py b/test/test_rb.py
@@ -125,16 +125,18 @@ def test_extend(self, rb_type, sampler, writer, storage, size):
             found_similar = False
             for b in rb._storage:
                 if isinstance(b, TensorDictBase):
-                    b = b.exclude("index").select(*set(d.keys()).intersection(b.keys()))
-                    d = d.select(*set(d.keys()).intersection(b.keys()))
+                    keys = set(d.keys()).intersection(b.keys())
+                    b = b.exclude("index").select(*keys, strict=False)
+                    keys = set(d.keys()).intersection(b.keys())
+                    d = d.select(*keys, strict=False)
 
                 value = b == d
                 if isinstance(value, (torch.Tensor, TensorDictBase)):
                     value = value.all()
                 if value:
-                    found_similar = True
                     break
-            assert found_similar
+            else:
+                raise RuntimeError("did not find match")
 
     def test_sample(self, rb_type, sampler, writer, storage, size):
         torch.manual_seed(0)
@@ -152,18 +154,18 @@ def test_sample(self, rb_type, sampler, writer, storage, size):
             for b in data:
                 print(b, d)
                 if isinstance(b, TensorDictBase):
-                    b = b.exclude("index").select(*set(d.keys()).intersection(b.keys()))
-                    d = d.select(*set(d.keys()).intersection(b.keys()))
+                    keys = set(d.keys()).intersection(b.keys())
+                    b = b.exclude("index").select(*keys, strict=False)
+                    keys = set(d.keys()).intersection(b.keys())
+                    d = d.select(*keys, strict=False)
 
                 value = b == d
                 if isinstance(value, (torch.Tensor, TensorDictBase)):
                     value = value.all()
                 if value:
-                    found_similar = True
                     break
-            if not found_similar:
-                d
-            assert found_similar, (d, data)
+            else:
+                raise RuntimeError("did not find match")
 
     def test_index(self, rb_type, sampler, writer, storage, size):
         torch.manual_seed(0)
@@ -394,16 +396,18 @@ def test_extend(self, rbtype, storage, size, prefetch):
             found_similar = False
             for b in rb._storage:
                 if isinstance(b, TensorDictBase):
-                    b = b.exclude("index").select(*set(d.keys()).intersection(b.keys()))
-                    d = d.select(*set(d.keys()).intersection(b.keys()))
+                    keys = set(d.keys()).intersection(b.keys())
+                    b = b.exclude("index").select(*keys, strict=False)
+                    keys = set(d.keys()).intersection(b.keys())
+                    d = d.select(*keys, strict=False)
 
                 value = b == d
                 if isinstance(value, (torch.Tensor, TensorDictBase)):
                     value = value.all()
                 if value:
-                    found_similar = True
                     break
-            assert found_similar
+            else:
+                raise RuntimeError("did not find match")
 
     def test_sample(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
@@ -418,18 +422,18 @@ def test_sample(self, rbtype, storage, size, prefetch):
             found_similar = False
             for b in data:
                 if isinstance(b, TensorDictBase):
-                    b = b.exclude("index").select(*set(d.keys()).intersection(b.keys()))
-                    d = d.select(*set(d.keys()).intersection(b.keys()))
+                    keys = set(d.keys()).intersection(b.keys())
+                    b = b.exclude("index").select(*keys, strict=False)
+                    keys = set(d.keys()).intersection(b.keys())
+                    d = d.select(*keys, strict=False)
 
                 value = b == d
                 if isinstance(value, (torch.Tensor, TensorDictBase)):
                     value = value.all()
                 if value:
-                    found_similar = True
                     break
-            if not found_similar:
-                d
-            assert found_similar, (d, data)
+            else:
+                raise RuntimeError("did not find matching value")
 
     def test_index(self, rbtype, storage, size, prefetch):
         torch.manual_seed(0)
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -536,7 +536,8 @@ def iterator(self) -> Iterator[TensorDictBase]:
     def _cast_to_policy(self, td: TensorDictBase) -> TensorDictBase:
         policy_device = self.device
         if hasattr(self.policy, "in_keys"):
-            td = td.select(*self.policy.in_keys)
+            # some keys may be absent -- TensorDictModule is resilient to missing keys
+            td = td.select(*self.policy.in_keys, strict=False)
         if self._td_policy is None:
             self._td_policy = td.to(policy_device)
         else:
diff --git a/torchrl/envs/vec_env.py b/torchrl/envs/vec_env.py
@@ -421,13 +421,14 @@ def _create_td(self) -> None:
                 )
         if self._single_task:
             shared_tensordict_parent = shared_tensordict_parent.select(
-                *self.selected_keys
+                *self.selected_keys,
+                strict=False,
             )
             self.shared_tensordict_parent = shared_tensordict_parent.to(self.device)
         else:
             shared_tensordict_parent = torch.stack(
                 [
-                    tensordict.select(*selected_keys).to(self.device)
+                    tensordict.select(*selected_keys, strict=False).to(self.device)
                     for tensordict, selected_keys in zip(
                         shared_tensordict_parent, self.selected_keys
                     )
@@ -573,7 +574,10 @@ def _step(
     ) -> TensorDict:
         self._assert_tensordict_shape(tensordict)
 
-        tensordict_in = tensordict.select(*self.env_input_keys)
+        tensordict_in = tensordict.select(
+            *self.env_input_keys,
+            strict=False,
+        )
         tensordict_out = []
         for i in range(self.num_workers):
             _tensordict_out = self._envs[i].step(tensordict_in[i])
@@ -611,7 +615,10 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
             keys = keys.union(_td.keys())
             self.shared_tensordicts[i].update_(_td)
 
-        return self.shared_tensordict_parent.select(*keys).clone()
+        return self.shared_tensordict_parent.select(
+            *keys,
+            strict=False,
+        ).clone()
 
     def __getattr__(self, attr: str) -> Any:
         if attr in self.__dir__():
@@ -740,7 +747,12 @@ def load_state_dict(self, state_dict: OrderedDict) -> None:
     def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         self._assert_tensordict_shape(tensordict)
 
-        self.shared_tensordict_parent.update_(tensordict.select(*self.env_input_keys))
+        self.shared_tensordict_parent.update_(
+            tensordict.select(
+                *self.env_input_keys,
+                strict=False,
+            )
+        )
         for i in range(self.num_workers):
             self.parent_channels[i].send(("step", None))
 
@@ -756,7 +768,10 @@ def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
             keys = keys.union(data)
         # We must pass a clone of the tensordict, as the values of this tensordict
         # will be modified in-place at further steps
-        return self.shared_tensordict_parent.select(*keys).clone()
+        return self.shared_tensordict_parent.select(
+            *keys,
+            strict=False,
+        ).clone()
 
     @_check_start
     def _shutdown_workers(self) -> None:
@@ -829,7 +844,10 @@ def _reset(self, tensordict: TensorDictBase, **kwargs) -> TensorDictBase:
                 # there might be some delay between writing the shared tensordict
                 # and reading the updated value on the main process
                 sleep(0.01)
-        return self.shared_tensordict_parent.select(*keys).clone()
+        return self.shared_tensordict_parent.select(
+            *keys,
+            strict=False,
+        ).clone()
 
     def __reduce__(self):
         if not self.is_closed:
@@ -979,7 +997,10 @@ def _run_worker_pipe_shared_mem(
             if not initialized:
                 raise RuntimeError("called 'init' before step")
             i += 1
-            _td = tensordict.select(*env_input_keys)
+            _td = tensordict.select(
+                *env_input_keys,
+                strict=False,
+            )
             if env.is_done and not allow_step_when_done:
                 raise RuntimeError(
                     f"calling step when env is done, just reset = {just_reset}"
@@ -989,7 +1010,7 @@ def _run_worker_pipe_shared_mem(
                 step_keys = set(_td.keys()) - set(env_input_keys)
             if pin_memory:
                 _td.pin_memory()
-            tensordict.update_(_td.select(*step_keys))
+            tensordict.update_(_td.select(*step_keys, strict=False))
             if _td.get("done"):
                 msg = "done"
             else:
diff --git a/torchrl/modules/models/model_based.py b/torchrl/modules/models/model_based.py
@@ -200,7 +200,7 @@ def forward(self, tensordict):
             tensordict_out.append(_tensordict)
             if t < time_steps - 1:
                 _tensordict = step_mdp(
-                    _tensordict.select(*self.out_keys), keep_other=False
+                    _tensordict.select(*self.out_keys, strict=False), keep_other=False
                 )
                 _tensordict = update_values[..., t + 1].update(_tensordict)
 

Original file line number	Diff line number	Diff line change
`@@ -200,7 +200,7 @@ def forward(self, tensordict):`
`200`	`200`	`tensordict_out.append(_tensordict)`
`201`	`201`	`if t < time_steps - 1:`
`202`	`202`	`_tensordict = step_mdp(`
`203`		`- _tensordict.select(*self.out_keys), keep_other=False`
	`203`	`+ _tensordict.select(*self.out_keys, strict=False), keep_other=False`
`204`	`204`	`)`
`205`	`205`	`_tensordict = update_values[..., t + 1].update(_tensordict)`
`206`	`206`