lint

shwang · shwang · commit 31d79d4a2d22 · 2020-08-31T18:04:07.000-07:00
diff --git a/src/imitation/algorithms/adversarial.py b/src/imitation/algorithms/adversarial.py
@@ -180,7 +180,11 @@ def gen_policy(self) -> base_class.BaseRLModel:
         return self._gen_policy
 
     def _gen_log_action_prob_from_unnormalized(
-        self, observation: np.ndarray, *, actions: np.ndarray, logp=True,
+        self,
+        observation: np.ndarray,
+        *,
+        actions: np.ndarray,
+        logp=True,
     ) -> np.ndarray:
         """Calculate generator log action probabilility.
 
@@ -306,7 +310,9 @@ def train_gen(
         self._gen_replay_buffer.store(gen_samples)
 
     def train(
-        self, total_timesteps: int, callback: Optional[Callable[[int], None]] = None,
+        self,
+        total_timesteps: int,
+        callback: Optional[Callable[[int], None]] = None,
     ) -> None:
         """Alternates between training the generator and discriminator.
 
diff --git a/src/imitation/algorithms/bc.py b/src/imitation/algorithms/bc.py
@@ -67,7 +67,9 @@ def __init__(
         policy_class: Type[ActorCriticPolicy] = FeedForward32Policy,
         policy_kwargs: Optional[Mapping[str, Any]] = None,
         expert_data: Union[
-            types.TransitionsMinimal, datasets.Dataset[types.TransitionsMinimal], None,
+            types.TransitionsMinimal,
+            datasets.Dataset[types.TransitionsMinimal],
+            None,
         ] = None,
         batch_size: int = 32,
         optimizer_cls: Type[tf.train.Optimizer] = tf.train.AdamOptimizer,
@@ -122,7 +124,8 @@ def __init__(
     def set_expert_dataset(
         self,
         expert_data: Union[
-            types.TransitionsMinimal, datasets.Dataset[types.TransitionsMinimal],
+            types.TransitionsMinimal,
+            datasets.Dataset[types.TransitionsMinimal],
         ],
     ):
         """Replace the current expert dataset with a new one.
@@ -253,7 +256,8 @@ def save_policy(self, policy_path: str):
 
     @staticmethod
     def reconstruct_policy(
-        policy_path: str, sess: Optional[tf.Session] = None,
+        policy_path: str,
+        sess: Optional[tf.Session] = None,
     ) -> BasePolicy:
         """Reconstruct a saved policy.
 
diff --git a/src/imitation/algorithms/dagger.py b/src/imitation/algorithms/dagger.py
@@ -41,7 +41,10 @@ def schedule(i: int) -> float:
     return schedule
 
 
-def _save_trajectory(npz_path: str, trajectory: types.Trajectory,) -> None:
+def _save_trajectory(
+    npz_path: str,
+    trajectory: types.Trajectory,
+) -> None:
     """Save a trajectory as a compressed Numpy file."""
     save_dir = os.path.dirname(npz_path)
     if save_dir:
diff --git a/src/imitation/algorithms/density_baselines.py b/src/imitation/algorithms/density_baselines.py
@@ -224,20 +224,20 @@ def __init__(
         is_stationary: bool = False,
     ):
         r"""Family of simple imitation learning baseline algorithms that apply RL to
-         maximise a rough density estimate of the demonstration trajectories.
-         Specifically, it constructs a non-parametric estimate of `p(s)`, `p(s,s')`,
-         `p_t(s,a)`, etc. (depending on options), then rewards the imitation learner
-         with `r_t(s,a,s')=\log p_t(s,a,s')` (or `\log p(s,s')`, or whatever the
-         user wants the model to condition on).
+        maximise a rough density estimate of the demonstration trajectories.
+        Specifically, it constructs a non-parametric estimate of `p(s)`, `p(s,s')`,
+        `p_t(s,a)`, etc. (depending on options), then rewards the imitation learner
+        with `r_t(s,a,s')=\log p_t(s,a,s')` (or `\log p(s,s')`, or whatever the
+        user wants the model to condition on).
 
-         Args:
-             venv: environment to train on.
-             rollouts: list of expert trajectories to imitate.
-             imitation_trainer: RL algorithm & initial policy that will
-                 be used to train the imitation learner.
-             kernel, kernel_bandwidth, density_type, is_stationary,
-                 n_expert_trajectories: these are passed directly to `DensityReward`;
-                 refer to documentation for that class."""
+        Args:
+            venv: environment to train on.
+            rollouts: list of expert trajectories to imitate.
+            imitation_trainer: RL algorithm & initial policy that will
+                be used to train the imitation learner.
+            kernel, kernel_bandwidth, density_type, is_stationary,
+                n_expert_trajectories: these are passed directly to `DensityReward`;
+                refer to documentation for that class."""
         self.venv = venv
         self.imitation_trainer = imitation_trainer
         self.reward_fn = DensityReward(
diff --git a/src/imitation/data/buffer.py b/src/imitation/data/buffer.py
@@ -331,7 +331,7 @@ def from_data(
             obs_shape=obs_shape,
             act_shape=act_shape,
             obs_dtype=transitions.obs.dtype,
-            act_dtype=transitions.acts.dtype,
+            act_dtype=transitions.acts.dtype,  # pytype: disable=wrong-arg-types
         )
         instance.store(transitions, truncate_ok=truncate_ok)
         return instance
diff --git a/src/imitation/data/rollout.py b/src/imitation/data/rollout.py
@@ -187,7 +187,8 @@ def f(trajectories: Sequence[types.TrajectoryWithRew]):
 
 
 def make_sample_until(
-    n_timesteps: Optional[int], n_episodes: Optional[int],
+    n_timesteps: Optional[int],
+    n_episodes: Optional[int],
 ) -> GenTrajTerminationFn:
     """Returns a termination condition sampling until n_timesteps or n_episodes.
 
diff --git a/src/imitation/envs/examples/airl_envs/dynamic_mjc/model_builder.py b/src/imitation/envs/examples/airl_envs/dynamic_mjc/model_builder.py
@@ -48,10 +48,10 @@ def __init__(self, name):
     @contextmanager
     def asfile(self):
         """Usage:
-    model = MJCModel('reacher')
-    with model.asfile() as f:
-        print f.read()  # prints a dump of the model
-    """
+        model = MJCModel('reacher')
+        with model.asfile() as f:
+            print f.read()  # prints a dump of the model
+        """
         with tempfile.NamedTemporaryFile(mode="w+", suffix=".xml", delete=True) as f:
             self.root.write(f)
             f.seek(0)
diff --git a/src/imitation/envs/examples/airl_envs/utils.py b/src/imitation/envs/examples/airl_envs/utils.py
@@ -4,15 +4,15 @@
 def flat_to_one_hot(val, ndim):
     """
 
-  >>> flat_to_one_hot(2, ndim=4)
-  array([ 0.,  0.,  1.,  0.])
-  >>> flat_to_one_hot(4, ndim=5)
-  array([ 0.,  0.,  0.,  0.,  1.])
-  >>> flat_to_one_hot(np.array([2, 4, 3]), ndim=5)
-  array([[ 0.,  0.,  1.,  0.,  0.],
-         [ 0.,  0.,  0.,  0.,  1.],
-         [ 0.,  0.,  0.,  1.,  0.]])
-  """
+    >>> flat_to_one_hot(2, ndim=4)
+    array([ 0.,  0.,  1.,  0.])
+    >>> flat_to_one_hot(4, ndim=5)
+    array([ 0.,  0.,  0.,  0.,  1.])
+    >>> flat_to_one_hot(np.array([2, 4, 3]), ndim=5)
+    array([[ 0.,  0.,  1.,  0.,  0.],
+           [ 0.,  0.,  0.,  0.,  1.],
+           [ 0.,  0.,  0.,  1.,  0.]])
+    """
     shape = np.array(val).shape
     v = np.zeros(shape + (ndim,))
     if len(shape) == 1:
@@ -24,13 +24,13 @@ def flat_to_one_hot(val, ndim):
 
 def one_hot_to_flat(val):
     """
-  >>> one_hot_to_flat(np.array([0,0,0,0,1]))
-  4
-  >>> one_hot_to_flat(np.array([0,0,1,0]))
-  2
-  >>> one_hot_to_flat(np.array([[0,0,1,0], [1,0,0,0], [0,1,0,0]]))
-  array([2, 0, 1])
-  """
+    >>> one_hot_to_flat(np.array([0,0,0,0,1]))
+    4
+    >>> one_hot_to_flat(np.array([0,0,1,0]))
+    2
+    >>> one_hot_to_flat(np.array([[0,0,1,0], [1,0,0,0], [0,1,0,0]]))
+    array([2, 0, 1])
+    """
     idxs = np.array(np.where(val == 1.0))[-1]
     if len(val.shape) == 1:
         return int(idxs)
diff --git a/src/imitation/policies/serialize.py b/src/imitation/policies/serialize.py
@@ -100,11 +100,15 @@ def f(path: str, venv: VecEnv) -> Iterator[BasePolicy]:
 
 policy_registry.register(
     "random",
-    value=registry.build_loader_fn_require_space(registry.dummy_context(RandomPolicy),),
+    value=registry.build_loader_fn_require_space(
+        registry.dummy_context(RandomPolicy),
+    ),
 )
 policy_registry.register(
     "zero",
-    value=registry.build_loader_fn_require_space(registry.dummy_context(ZeroPolicy),),
+    value=registry.build_loader_fn_require_space(
+        registry.dummy_context(ZeroPolicy),
+    ),
 )
 
 
@@ -142,7 +146,9 @@ def load_policy(
 
 
 def save_stable_model(
-    output_dir: str, model: BaseRLModel, vec_normalize: Optional[VecNormalize] = None,
+    output_dir: str,
+    model: BaseRLModel,
+    vec_normalize: Optional[VecNormalize] = None,
 ) -> None:
     """Serialize policy.
 
diff --git a/src/imitation/rewards/discrim_net.py b/src/imitation/rewards/discrim_net.py
@@ -231,7 +231,11 @@ def reward_train(
         return rew
 
     def reward_test(
-        self, obs: np.ndarray, act: np.ndarray, next_obs: np.ndarray, dones: np.ndarray,
+        self,
+        obs: np.ndarray,
+        act: np.ndarray,
+        next_obs: np.ndarray,
+        dones: np.ndarray,
     ) -> np.ndarray:
         """Vectorized reward for training an expert during transfer learning.
 
diff --git a/src/imitation/rewards/reward_net.py b/src/imitation/rewards/reward_net.py
@@ -217,7 +217,9 @@ def reward_output_train(self):
 
     @abstractmethod
     def build_phi_network(
-        self, obs_input: tf.Tensor, next_obs_input: tf.Tensor,
+        self,
+        obs_input: tf.Tensor,
+        next_obs_input: tf.Tensor,
     ) -> Tuple[tf.Tensor, tf.Tensor, networks.LayersDict]:
         """Build the reward shaping network (disentangles dynamics from reward).
 
@@ -373,7 +375,10 @@ def __init__(
         self.theta_kwargs = theta_kwargs or {}
         self.phi_kwargs = phi_kwargs or {}
         RewardNetShaped.__init__(
-            self, observation_space, action_space, **kwargs,
+            self,
+            observation_space,
+            action_space,
+            **kwargs,
         )
         serialize.LayersSerializable.__init__(**params, layers=self._layers)
 
diff --git a/src/imitation/scripts/config/expert_demos.py b/src/imitation/scripts/config/expert_demos.py
@@ -98,7 +98,9 @@ def hopper():
 @expert_demos_ex.named_config
 def humanoid():
     env_name = "Humanoid-v2"
-    init_rl_kwargs = dict(n_steps=2048,)  # batch size of 2048*8=16384 due to num_vec
+    init_rl_kwargs = dict(
+        n_steps=2048,
+    )  # batch size of 2048*8=16384 due to num_vec
     total_timesteps = int(10e6)  # fairly discontinuous, needs at least 5e6
 
 
@@ -160,7 +162,9 @@ def fast():
 # Shared settings
 
 ant_shared_locals = dict(
-    init_rl_kwargs=dict(n_steps=2048,),  # batch size of 2048*8=16384 due to num_vec
+    init_rl_kwargs=dict(
+        n_steps=2048,
+    ),  # batch size of 2048*8=16384 due to num_vec
     total_timesteps=int(5e6),
     max_episode_steps=500,  # To match `inverse_rl` settings.
 )
diff --git a/src/imitation/scripts/config/train_adversarial.py b/src/imitation/scripts/config/train_adversarial.py
@@ -50,7 +50,8 @@ def train_defaults():
 
     # Modifies the __init__ arguments for the imitation policy
     init_rl_kwargs = dict(
-        policy_class=base.FeedForward32Policy, **DEFAULT_INIT_RL_KWARGS,
+        policy_class=base.FeedForward32Policy,
+        **DEFAULT_INIT_RL_KWARGS,
     )
     gen_batch_size = 2048  # Batch size for generator updates
 
diff --git a/src/imitation/scripts/parallel.py b/src/imitation/scripts/parallel.py
@@ -89,7 +89,10 @@ def parallel(
             base_config_updates["data_dir"] = data_dir
 
     trainable = _ray_tune_sacred_wrapper(
-        sacred_ex_name, run_name, base_named_configs, base_config_updates,
+        sacred_ex_name,
+        run_name,
+        base_named_configs,
+        base_config_updates,
     )
 
     # Disable all Ray Loggers.
diff --git a/src/imitation/scripts/train_adversarial.py b/src/imitation/scripts/train_adversarial.py
@@ -162,7 +162,8 @@ def train(
         algorithm_kwargs_shared = algorithm_kwargs.get("shared", {})
         algorithm_kwargs_algo = algorithm_kwargs.get(algorithm, {})
         final_algorithm_kwargs = dict(
-            **algorithm_kwargs_shared, **algorithm_kwargs_algo,
+            **algorithm_kwargs_shared,
+            **algorithm_kwargs_algo,
         )
 
         if algorithm.lower() == "gail":
diff --git a/src/imitation/util/logger.py b/src/imitation/util/logger.py
@@ -6,7 +6,8 @@
 
 
 def _build_output_formats(
-    folder: str, format_strs: Sequence[str] = None,
+    folder: str,
+    format_strs: Sequence[str] = None,
 ) -> Sequence[sb_logger.KVWriter]:
     """Build output formats for initializing a Stable Baselines Logger.
 
diff --git a/tests/test_buffer.py b/tests/test_buffer.py
@@ -230,7 +230,12 @@ def _check_buf(buf):
     rews = np.array([0.5, 1.0], dtype=float)
     buf_rew = ReplayBuffer.from_data(
         types.TransitionsWithRew(
-            obs=obs, acts=acts, next_obs=next_obs, rews=rews, dones=dones, infos=infos,
+            obs=obs,
+            acts=acts,
+            next_obs=next_obs,
+            rews=rews,
+            dones=dones,
+            infos=infos,
         )
     )
     _check_buf(buf_rew)
diff --git a/tests/test_buffering_wrapper.py b/tests/test_buffering_wrapper.py
@@ -46,7 +46,9 @@ def step(self, action):
         return t, t * 10, done, {}
 
 
-def _make_buffering_venv(error_on_premature_reset: bool,) -> BufferingWrapper:
+def _make_buffering_venv(
+    error_on_premature_reset: bool,
+) -> BufferingWrapper:
     venv = DummyVecEnv([_CountingEnv] * 2)
     venv = BufferingWrapper(venv, error_on_premature_reset)
     venv.reset()
@@ -73,7 +75,12 @@ def concat(x):
     dones = concat(t.dones for t in trans_list)
     infos = concat(t.infos for t in trans_list)
     return types.TransitionsWithRew(
-        obs=obs, next_obs=next_obs, rews=rews, acts=acts, dones=dones, infos=infos,
+        obs=obs,
+        next_obs=next_obs,
+        rews=rews,
+        acts=acts,
+        dones=dones,
+        infos=infos,
     )
 
 
diff --git a/tests/test_data.py b/tests/test_data.py
@@ -102,7 +102,9 @@ def test_valid_trajectories(
             assert len(traj) == length
 
     def test_invalid_trajectories(
-        self, trajectory: types.Trajectory, trajectory_rew: types.TrajectoryWithRew,
+        self,
+        trajectory: types.Trajectory,
+        trajectory_rew: types.TrajectoryWithRew,
     ) -> None:
         """Checks input validation catches space and dtype related errors."""
         trajs = [trajectory, trajectory_rew]
@@ -304,7 +306,13 @@ def test_dict_dataset_parallel_rows(
     Nontrivially, shuffled datasets should maintain this order.
     """
     dataset_cls, kwargs = dict_dataset_params
-    range_data_map = {k: i + np.arange(50,) for i, k in enumerate("abcd")}
+    range_data_map = {
+        k: i
+        + np.arange(
+            50,
+        )
+        for i, k in enumerate("abcd")
+    }
     dict_dataset = dataset_cls(range_data_map, **kwargs)
     for _ in range(n_checks):
         n_samples = np.random.randint(max_batch_size) + 1
@@ -333,7 +341,11 @@ def arange_dataset(self, shuffle, dataset_size):
         return ds
 
     def test_epoch_order_dict_dataset_shuffle_order(
-        self, arange_dataset, shuffle, dataset_size, n_checks=3,
+        self,
+        arange_dataset,
+        shuffle,
+        dataset_size,
+        n_checks=3,
     ):
         """Check that epoch order is deterministic iff not shuffled.
 
@@ -350,7 +362,10 @@ def test_epoch_order_dict_dataset_shuffle_order(
             assert same_order != shuffle
 
     def test_epoch_order_dict_dataset_order_property(
-        self, arange_dataset, max_batch_size=31, n_epochs=4,
+        self,
+        arange_dataset,
+        max_batch_size=31,
+        n_epochs=4,
     ):
         """No sample should be returned n+1 times until others are returned n times."""
         counter = collections.Counter({i: 0 for i in range(arange_dataset.size())})
diff --git a/tests/test_envs.py b/tests/test_envs.py
diff --git a/tests/test_rollout.py b/tests/test_rollout.py
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
diff --git a/tests/test_trainer.py b/tests/test_trainer.py

Original file line number	Diff line number	Diff line change
`@@ -331,7 +331,7 @@ def from_data(`
`331`	`331`	`obs_shape=obs_shape,`
`332`	`332`	`act_shape=act_shape,`
`333`	`333`	`obs_dtype=transitions.obs.dtype,`
`334`		`- act_dtype=transitions.acts.dtype,`
	`334`	`+ act_dtype=transitions.acts.dtype, # pytype: disable=wrong-arg-types`
`335`	`335`	`)`
`336`	`336`	`instance.store(transitions, truncate_ok=truncate_ok)`
`337`	`337`	`return instance`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,8 @@ def train_defaults():`
`50`	`50`
`51`	`51`	`# Modifies the __init__ arguments for the imitation policy`
`52`	`52`	`init_rl_kwargs = dict(`
`53`		`- policy_class=base.FeedForward32Policy, **DEFAULT_INIT_RL_KWARGS,`
	`53`	`+ policy_class=base.FeedForward32Policy,`
	`54`	`+ **DEFAULT_INIT_RL_KWARGS,`
`54`	`55`	`)`
`55`	`56`	`gen_batch_size = 2048 # Batch size for generator updates`
`56`	`57`
Original file line number	Diff line number	Diff line change
`@@ -162,7 +162,8 @@ def train(`
`162`	`162`	`algorithm_kwargs_shared = algorithm_kwargs.get("shared", {})`
`163`	`163`	`algorithm_kwargs_algo = algorithm_kwargs.get(algorithm, {})`
`164`	`164`	`final_algorithm_kwargs = dict(`
`165`		`- algorithm_kwargs_shared, algorithm_kwargs_algo,`
	`165`	`+ **algorithm_kwargs_shared,`
	`166`	`+ **algorithm_kwargs_algo,`
`166`	`167`	`)`
`167`	`168`
`168`	`169`	`if algorithm.lower() == "gail":`
Original file line number	Diff line number	Diff line change
`@@ -230,7 +230,12 @@ def _check_buf(buf):`
`230`	`230`	`rews = np.array([0.5, 1.0], dtype=float)`
`231`	`231`	`buf_rew = ReplayBuffer.from_data(`
`232`	`232`	`types.TransitionsWithRew(`
`233`		`- obs=obs, acts=acts, next_obs=next_obs, rews=rews, dones=dones, infos=infos,`
	`233`	`+ obs=obs,`
	`234`	`+ acts=acts,`
	`235`	`+ next_obs=next_obs,`
	`236`	`+ rews=rews,`
	`237`	`+ dones=dones,`
	`238`	`+ infos=infos,`
`234`	`239`	`)`
`235`	`240`	`)`
`236`	`241`	`_check_buf(buf_rew)`