[Feature] Make DQN compatible with nn.Module (#632)

svarolgunes · Serhat Varolgünes · web-flow · commit 49039d1d3ce4 · 2022-11-03T11:54:04.000Z
* Modification to let nn.Module type value_network arg in DQNLoss

* Unit tests added to cover the feature

* utility functions added

* tests are updated, util fnc rectified

* initializer docstrings are updated for loss classes

* util functions added to rst file

* lint fix

Co-authored-by: Serhat Varolgünes &lt;svarolgunes@fb.com&gt;
diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -23,6 +23,8 @@ TensorDict modules
     ActorValueOperator
     ActorCriticOperator
     ActorCriticWrapper
+    is_tensordict_compatible
+    ensure_tensordict_compatible
 
 Hooks
 -----
diff --git a/test/test_cost.py b/test/test_cost.py
@@ -126,7 +126,13 @@ class TestDQN:
     seed = 0
 
     def _create_mock_actor(
-        self, action_spec_type, batch=2, obs_dim=3, action_dim=4, device="cpu"
+        self,
+        action_spec_type,
+        batch=2,
+        obs_dim=3,
+        action_dim=4,
+        device="cpu",
+        is_nn_module=False,
     ):
         # Actor
         if action_spec_type == "one_hot":
@@ -141,6 +147,8 @@ def _create_mock_actor(
             raise ValueError(f"Wrong {action_spec_type}")
 
         module = nn.Linear(obs_dim, action_dim)
+        if is_nn_module:
+            return module.to(device)
         actor = QValueActor(
             spec=CompositeSpec(
                 action=action_spec, action_value=None, chosen_action_value=None
@@ -158,6 +166,7 @@ def _create_mock_distributional_actor(
         atoms=5,
         vmin=1,
         vmax=5,
+        is_nn_module=False,
     ):
         # Actor
         if action_spec_type == "mult_one_hot":
@@ -170,6 +179,11 @@ def _create_mock_distributional_actor(
             raise ValueError(f"Wrong {action_spec_type}")
         support = torch.linspace(vmin, vmax, atoms, dtype=torch.float)
         module = MLP(obs_dim, (atoms, action_dim))
+        # TODO: Fails tests with
+        # TypeError: __init__() missing 1 required keyword-only argument: 'support'
+        # DistributionalQValueActor initializer expects additional inputs.
+        # if is_nn_module:
+        #     return module
         actor = DistributionalQValueActor(
             spec=CompositeSpec(action=action_spec, action_value=None),
             module=module,
@@ -272,10 +286,11 @@ def _create_seq_mock_data_dqn(
     @pytest.mark.parametrize(
         "action_spec_type", ("nd_bounded", "one_hot", "categorical")
     )
-    def test_dqn(self, delay_value, device, action_spec_type):
+    @pytest.mark.parametrize("is_nn_module", (False, True))
+    def test_dqn(self, delay_value, device, action_spec_type, is_nn_module):
         torch.manual_seed(self.seed)
         actor = self._create_mock_actor(
-            action_spec_type=action_spec_type, device=device
+            action_spec_type=action_spec_type, device=device, is_nn_module=is_nn_module
         )
         td = self._create_mock_data_dqn(
             action_spec_type=action_spec_type, device=device
@@ -471,12 +486,13 @@ def test_dqn_batcher_nofunctorch(
     @pytest.mark.parametrize(
         "action_spec_type", ("mult_one_hot", "one_hot", "categorical")
     )
+    @pytest.mark.parametrize("is_nn_module", (False, True))
     def test_distributional_dqn(
-        self, atoms, delay_value, device, action_spec_type, gamma=0.9
+        self, atoms, delay_value, device, action_spec_type, is_nn_module, gamma=0.9
     ):
         torch.manual_seed(self.seed)
         actor = self._create_mock_distributional_actor(
-            action_spec_type=action_spec_type, atoms=atoms
+            action_spec_type=action_spec_type, atoms=atoms, is_nn_module=is_nn_module
         ).to(device)
 
         td = self._create_mock_data_dqn(
diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -31,6 +31,10 @@
 )
 from torchrl.envs.utils import set_exploration_mode
 from torchrl.modules import NormalParamWrapper, TanhNormal, TensorDictModule
+from torchrl.modules.tensordict_module.common import (
+    is_tensordict_compatible,
+    ensure_tensordict_compatible,
+)
 from torchrl.modules.tensordict_module.probabilistic import (
     ProbabilisticTensorDictModule,
 )
@@ -1842,3 +1846,110 @@ def test_subsequence_weight_update(self):
     if __name__ == "__main__":
         args, unknown = argparse.ArgumentParser().parse_known_args()
         pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
+
+
+def test_is_tensordict_compatible():
+    class MultiHeadLinear(nn.Module):
+        def __init__(self, in_1, out_1, out_2, out_3):
+            super().__init__()
+            self.linear_1 = nn.Linear(in_1, out_1)
+            self.linear_2 = nn.Linear(in_1, out_2)
+            self.linear_3 = nn.Linear(in_1, out_3)
+
+        def forward(self, x):
+            return self.linear_1(x), self.linear_2(x), self.linear_3(x)
+
+    td_module = TensorDictModule(
+        MultiHeadLinear(5, 4, 3, 2),
+        in_keys=["in_1", "in_2"],
+        out_keys=["out_1", "out_2"],
+    )
+    assert is_tensordict_compatible(td_module)
+
+    class MockCompatibleModule(nn.Module):
+        def __init__(self, in_keys, out_keys):
+            self.in_keys = in_keys
+            self.out_keys = out_keys
+
+        def forward(self, tensordict):
+            pass
+
+    compatible_nn_module = MockCompatibleModule(
+        in_keys=["in_1", "in_2"],
+        out_keys=["out_1", "out_2"],
+    )
+    assert is_tensordict_compatible(compatible_nn_module)
+
+    class MockIncompatibleModuleNoKeys(nn.Module):
+        def forward(self, input):
+            pass
+
+    incompatible_nn_module_no_keys = MockIncompatibleModuleNoKeys()
+    assert not is_tensordict_compatible(incompatible_nn_module_no_keys)
+
+    class MockIncompatibleModuleMultipleArgs(nn.Module):
+        def __init__(self, in_keys, out_keys):
+            self.in_keys = in_keys
+            self.out_keys = out_keys
+
+        def forward(self, input_1, input_2):
+            pass
+
+    incompatible_nn_module_multi_args = MockIncompatibleModuleMultipleArgs(
+        in_keys=["in_1", "in_2"],
+        out_keys=["out_1", "out_2"],
+    )
+    with pytest.raises(TypeError):
+        is_tensordict_compatible(incompatible_nn_module_multi_args)
+
+
+def test_ensure_tensordict_compatible():
+    class MultiHeadLinear(nn.Module):
+        def __init__(self, in_1, out_1, out_2, out_3):
+            super().__init__()
+            self.linear_1 = nn.Linear(in_1, out_1)
+            self.linear_2 = nn.Linear(in_1, out_2)
+            self.linear_3 = nn.Linear(in_1, out_3)
+
+        def forward(self, x):
+            return self.linear_1(x), self.linear_2(x), self.linear_3(x)
+
+    td_module = TensorDictModule(
+        MultiHeadLinear(5, 4, 3, 2),
+        in_keys=["in_1", "in_2"],
+        out_keys=["out_1", "out_2"],
+    )
+    ensured_module = ensure_tensordict_compatible(td_module)
+    assert ensured_module is td_module
+    with pytest.raises(TypeError):
+        ensure_tensordict_compatible(td_module, in_keys=["input"])
+    with pytest.raises(TypeError):
+        ensure_tensordict_compatible(td_module, out_keys=["output"])
+
+    class NonNNModule:
+        def __init__(self):
+            pass
+
+        def forward(self, x):
+            pass
+
+    non_nn_module = NonNNModule()
+    with pytest.raises(TypeError):
+        ensure_tensordict_compatible(non_nn_module)
+
+    class ErrorNNModule(nn.Module):
+        def forward(self, in_1, in_2):
+            pass
+
+    error_nn_module = ErrorNNModule()
+    with pytest.raises(TypeError):
+        ensure_tensordict_compatible(error_nn_module, in_keys=["input"])
+
+    nn_module = MultiHeadLinear(5, 4, 3, 2)
+    ensured_module = ensure_tensordict_compatible(
+        nn_module,
+        in_keys=["x"],
+        out_keys=["out_1", "out_2", "out_3"],
+    )
+    assert set(ensured_module.in_keys) == {"x"}
+    assert isinstance(ensured_module, TensorDictModule)
diff --git a/torchrl/modules/tensordict_module/common.py b/torchrl/modules/tensordict_module/common.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import inspect
 import warnings
 from copy import deepcopy
 from textwrap import indent
@@ -14,6 +15,7 @@
     List,
     Optional,
     Sequence,
+    Type,
     Union,
 )
 
@@ -625,3 +627,83 @@ def __getattr__(self, name: str) -> Any:
 
     def forward(self, *args, **kwargs):
         return self.td_module.forward(*args, **kwargs)
+
+
+def is_tensordict_compatible(module: Union[TensorDictModule, nn.Module]):
+    sig = inspect.signature(module.forward)
+
+    if isinstance(module, TensorDictModule) or (
+        len(sig.parameters) == 1
+        and hasattr(module, "in_keys")
+        and hasattr(module, "out_keys")
+    ):
+        # if the module is a TensorDictModule or takes a single argument and defines
+        # in_keys and out_keys then we assume it can already deal with TensorDict input
+        # to forward and we return True
+        return True
+    elif not hasattr(module, "in_keys") and not hasattr(module, "out_keys"):
+        # if it's not a TensorDictModule, and in_keys and out_keys are not defined then
+        # we assume no TensorDict compatibility and will try to wrap it.
+        return False
+
+    # if in_keys or out_keys were defined but module is not a TensorDictModule or
+    # accepts multiple arguments then it's likely the user is trying to do something
+    # that will have undetermined behaviour, we raise an error
+    raise TypeError(
+        "Received a module that defines in_keys or out_keys and also expects multiple "
+        "arguments to module.forward. If the module is compatible with TensorDict, it "
+        "should take a single argument of type TensorDict to module.forward and define "
+        "both in_keys and out_keys. Alternatively, module.forward can accept "
+        "arbitrarily many tensor inputs and leave in_keys and out_keys undefined and "
+        "TorchRL will attempt to automatically wrap the module with a TensorDictModule."
+    )
+
+
+def ensure_tensordict_compatible(
+    module: Union[
+        FunctionalModule, FunctionalModuleWithBuffers, TensorDictModule, nn.Module
+    ],
+    in_keys: Optional[Iterable[str]] = None,
+    out_keys: Optional[Iterable[str]] = None,
+    safe: bool = False,
+    wrapper_type: Optional[Type] = TensorDictModule,
+):
+    """Checks and ensures an object with forward method is TensorDict compatible."""
+    if is_tensordict_compatible(module):
+        if in_keys is not None and set(in_keys) != set(module.in_keys):
+            raise TypeError(
+                f"Arguments to module.forward ({set(module.in_keys)}) doesn't match "
+                f"with the expected TensorDict in_keys ({set(in_keys)})."
+            )
+        if out_keys is not None and set(module.out_keys) != set(out_keys):
+            raise TypeError(
+                f"Outputs of module.forward ({set(module.out_keys)}) doesn't match "
+                f"with the expected TensorDict out_keys ({set(out_keys)})."
+            )
+        # return module itself if it's already tensordict compatible
+        return module
+
+    if not isinstance(module, nn.Module):
+        raise TypeError(
+            "Argument to ensure_tensordict_compatible should be either "
+            "a TensorDictModule or an nn.Module"
+        )
+
+    sig = inspect.signature(module.forward)
+    if in_keys is not None and set(sig.parameters) != set(in_keys):
+        raise TypeError(
+            "Arguments to module.forward are incompatible with entries in "
+            "env.observation_spec. If you want TorchRL to automatically "
+            "wrap your module with a TensorDictModule then the arguments "
+            "to module must correspond one-to-one with entries in "
+            "in_keys. For more complex behaviour and more control you can "
+            "consider writing your own TensorDictModule."
+        )
+
+    # TODO: Check whether out_keys match (at least in number) if they are provided.
+    kwargs = {}
+    if in_keys is not None:
+        kwargs["in_keys"] = in_keys
+    if out_keys is not None:
+        kwargs["out_keys"] = out_keys
+    return wrapper_type(module, **kwargs)
diff --git a/torchrl/objectives/dqn.py b/torchrl/objectives/dqn.py