From 53b8ca66d4f77576da1127828214f56fd2e6353c Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Thu, 19 Aug 2021 14:43:53 -0700
Subject: [PATCH 01/17] from locobot to default

---
 projects/objectnav_baselines/experiments/objectnav_thor_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/projects/objectnav_baselines/experiments/objectnav_thor_base.py b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
index 577ea3996..e164657f8 100644
--- a/projects/objectnav_baselines/experiments/objectnav_thor_base.py
+++ b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
@@ -89,7 +89,7 @@ def env_args(cls):
             visibilityDistance=cls.VISIBILITY_DISTANCE,
             gridSize=cls.STEP_SIZE,
             snapToGrid=False,
-            agentMode="locobot",
+            agentMode="default",
             fieldOfView=horizontal_to_vertical_fov(
                 horizontal_fov_in_degrees=cls.HORIZONTAL_FIELD_OF_VIEW,
                 width=cls.CAMERA_WIDTH,

From 877f7a574bc87bcc22b63df969de72bd6f9b6e5f Mon Sep 17 00:00:00 2001
From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com>
Date: Mon, 23 Aug 2021 16:45:15 +0200
Subject: [PATCH 02/17] Objectnav iTHOR default agent baseline

---
 .../ithor_plugin/ithor_environment.py         |   3 +-
 .../object_nav_ithor_ppo_baseline.py          | 309 ++++++++++++++++++
 2 files changed, 311 insertions(+), 1 deletion(-)
 create mode 100644 projects/tutorials/object_nav_ithor_ppo_baseline.py

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 50c5db406..6d2815fe8 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -43,6 +43,7 @@ def __init__(
         make_agents_visible: bool = True,
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
+        **kwargs
     ) -> None:
         """Initializer.
 
@@ -96,7 +97,7 @@ def __init__(
         self._always_return_visible_range = False
         self.simplify_physics = simplify_physics
 
-        self.start(None)
+        self.start(**kwargs)
         # noinspection PyTypeHints
         self.controller.docker_enabled = docker_enabled  # type: ignore
 
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
new file mode 100644
index 000000000..35685ef0f
--- /dev/null
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -0,0 +1,309 @@
+from math import ceil
+from typing import Dict, Any, List, Optional
+
+import gym
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.optim.lr_scheduler import LambdaLR
+
+from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
+from allenact.algorithms.onpolicy_sync.losses import PPO
+from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
+from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams
+from allenact.base_abstractions.sensor import SensorSuite
+from allenact.base_abstractions.task import TaskSampler
+from allenact.utils.experiment_utils import (
+    Builder,
+    PipelineStage,
+    TrainingPipeline,
+    LinearDecay,
+)
+from allenact_plugins.ithor_plugin.ithor_sensors import (
+    RGBSensorThor,
+    GoalObjectTypeThorSensor,
+)
+from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
+from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
+from projects.objectnav_baselines.models.object_nav_models import (
+    ObjectNavBaselineActorCritic,
+)
+
+
+class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
+    """A simple object navigation experiment in THOR.
+
+    Training with PPO.
+    """
+
+    # A simple setting, train/valid/test are all the same single scene
+    # and we're looking for a single object
+    OBJECT_TYPES = sorted(
+            [
+                "AlarmClock",
+                "Apple",
+                "Book",
+                "Bowl",
+                "Box",
+                "Candle",
+                "GarbageCan",
+                "HousePlant",
+                "Laptop",
+                "SoapBottle",
+                "Television",
+                "Toaster",
+            ]
+        )
+    train_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz")
+    val_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz")
+    test_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz")
+
+    TRAIN_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)]
+    VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)]
+    TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)]
+
+    # Setting up sensors and basic environment details
+    CAMERA_WIDTH = 400
+    CAMERA_HEIGHT = 300
+    SCREEN_SIZE = 224
+    SENSORS = [
+        RGBSensorThor(
+            height=SCREEN_SIZE,
+            width=SCREEN_SIZE,
+            use_resnet_normalization=True,
+        ),
+        GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
+    ]
+
+    ENV_ARGS = {
+        "player_screen_height": CAMERA_WIDTH,
+        "player_screen_width": CAMERA_HEIGHT,
+        "quality": "Very Low",
+        "rotateStepDegrees": 30,
+        "visibilityDistance"=1.0,
+        "gridSize" =0.25,    
+    }
+
+    MAX_STEPS = 128
+    ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
+    VALID_SAMPLES_IN_SCENE = 10
+    TEST_SAMPLES_IN_SCENE = 100
+
+
+    DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
+    DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,)
+    DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,)
+
+    @classmethod
+    def tag(cls):
+        return "ObjectNavThorPPO"
+
+    @classmethod
+    def training_pipeline(cls, **kwargs):
+        ppo_steps = int(300000000)
+        lr = 3e-4
+        num_mini_batch = 1 if not torch.cuda.is_available() else 6
+        update_repeats = 4
+        num_steps = 128
+        metric_accumulate_interval = 10000 # Log every 10 max length tasks
+        save_interval = 5000000
+        gamma = 0.99
+        use_gae = True
+        gae_lambda = 0.95
+        max_grad_norm = 0.5
+
+        return TrainingPipeline(
+            save_interval=save_interval,
+            metric_accumulate_interval=metric_accumulate_interval,
+            optimizer_builder=Builder(optim.Adam, dict(lr=lr)),
+            num_mini_batch=num_mini_batch,
+            update_repeats=update_repeats,
+            max_grad_norm=max_grad_norm,
+            num_steps=num_steps,
+            named_losses={
+                "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig),
+            },
+            gamma=gamma,
+            use_gae=use_gae,
+            gae_lambda=gae_lambda,
+            advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
+            pipeline_stages=[
+                PipelineStage(
+                    loss_names=["ppo_loss"],
+                    max_stage_steps=ppo_steps,
+                ),
+            ],
+            lr_scheduler_builder=Builder(
+                LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
+            ),
+        )
+
+    @classmethod
+    def machine_params(cls, mode="train", **kwargs):
+        num_gpus = torch.cuda.device_count()
+        has_gpu = num_gpus != 0
+
+        if mode == "train":
+            nprocesses = 40
+            workers_per_device = 1
+            devices = (
+                [torch.device("cpu")]
+                if not torch.cuda.is_available()
+                else cls.DEFAULT_TRAIN_GPU_IDS * workers_per_device
+            )
+            nprocesses = evenly_distribute_count_into_bins(
+                nprocesses, max(len(devices), 1)
+            )
+            sampler_devices = cls.DEFAULT_TRAIN_GPU_IDS
+        elif mode == "valid":
+            nprocesses = 1
+            gpu_ids = [] if not torch.cuda.is_available() else cls.DEFAULT_VALID_GPU_IDS
+        elif mode == "test":
+            nprocesses = 1
+            gpu_ids = [] if not torch.cuda.is_available() else cls.DEFAULT_TEST_GPU_IDS
+        else:
+            raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
+
+        return MachineParams(
+            nprocesses=nprocesses,
+            devices=gpu_ids,
+        )
+
+    @classmethod
+    def create_model(cls, **kwargs) -> nn.Module:
+        return ObjectNavBaselineActorCritic(
+            action_space=gym.spaces.Discrete(
+                len(ObjectNaviThorGridTask.class_action_names())
+            ),
+            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
+            rgb_uuid=cls.SENSORS[0].uuid,
+            depth_uuid=None,
+            goal_sensor_uuid="goal_object_type_ind",
+            hidden_size=512,
+            object_type_embedding_dim=8,
+        )
+
+    @classmethod
+    def make_sampler_fn(cls, **kwargs) -> TaskSampler:
+        return ObjectNavTaskSampler(**kwargs)
+
+    @staticmethod
+    def _partition_inds(n: int, num_parts: int):
+        return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype(
+            np.int32
+        )
+
+    def _get_sampler_args_for_scene_split(
+        self,
+        scenes: List[str],
+        process_ind: int,
+        total_processes: int,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        if total_processes > len(scenes):  # oversample some scenes -> bias
+            if total_processes % len(scenes) != 0:
+                print(
+                    "Warning: oversampling some of the scenes to feed all processes."
+                    " You can avoid this by setting a number of workers divisible by the number of scenes"
+                )
+            scenes = scenes * int(ceil(total_processes / len(scenes)))
+            scenes = scenes[: total_processes * (len(scenes) // total_processes)]
+        else:
+            if len(scenes) % total_processes != 0:
+                print(
+                    "Warning: oversampling some of the scenes to feed all processes."
+                    " You can avoid this by setting a number of workers divisor of the number of scenes"
+                )
+        inds = self._partition_inds(len(scenes), total_processes)
+
+        return {
+            "scenes": scenes[inds[process_ind] : inds[process_ind + 1]],
+            "object_types": self.OBJECT_TYPES,
+            "env_args": self.ENV_ARGS,
+            "max_steps": self.MAX_STEPS,
+            "sensors": self.SENSORS,
+            "action_space": gym.spaces.Discrete(
+                len(ObjectNaviThorGridTask.class_action_names())
+            ),
+            "seed": seeds[process_ind] if seeds is not None else None,
+            "deterministic_cudnn": deterministic_cudnn,
+        }
+
+    def train_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]] = None,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.TRAIN_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+        )
+        res["scene_period"] = "manual"
+        res["env_args"] = {}
+        res["env_args"].update(self.ENV_ARGS)
+        res["env_args"]["x_display"] = (
+            ("0.%d" % devices[process_ind % len(devices)])
+            if devices is not None and len(devices) > 0
+            else None
+        )
+        return res
+
+    def valid_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]] = None,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.VALID_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+        )
+        res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
+        res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
+        res["env_args"] = {}
+        res["env_args"].update(self.ENV_ARGS)
+        res["env_args"]["x_display"] = (
+            ("0.%d" % devices[process_ind % len(devices)])
+            if devices is not None and len(devices) > 0
+            else None
+        )
+        return res
+
+    def test_task_sampler_args(
+        self,
+        process_ind: int,
+        total_processes: int,
+        devices: Optional[List[int]] = None,
+        seeds: Optional[List[int]] = None,
+        deterministic_cudnn: bool = False,
+    ) -> Dict[str, Any]:
+        res = self._get_sampler_args_for_scene_split(
+            self.TEST_SCENES,
+            process_ind,
+            total_processes,
+            seeds=seeds,
+            deterministic_cudnn=deterministic_cudnn,
+        )
+        res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
+        res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
+        res["env_args"] = {}
+        res["env_args"].update(self.ENV_ARGS)
+        res["env_args"]["x_display"] = (
+            ("0.%d" % devices[process_ind % len(devices)])
+            if devices is not None and len(devices) > 0
+            else None
+        )
+        return res

From 0d33b2b6049724910548c9e3c1e236c7638faf08 Mon Sep 17 00:00:00 2001
From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com>
Date: Mon, 23 Aug 2021 16:50:36 +0200
Subject: [PATCH 03/17] Update object_nav_ithor_ppo_baseline.py

---
 projects/tutorials/object_nav_ithor_ppo_baseline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 35685ef0f..8de90f84c 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -81,8 +81,8 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
         "player_screen_width": CAMERA_HEIGHT,
         "quality": "Very Low",
         "rotateStepDegrees": 30,
-        "visibilityDistance"=1.0,
-        "gridSize" =0.25,    
+        "visibilityDistance": 1.0,
+        "gridSize": 0.25,    
     }
 
     MAX_STEPS = 128

From 66a5b032a6bfd37336bb48e27c7d3a70f81c1fc2 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Mon, 23 Aug 2021 08:48:16 -0700
Subject: [PATCH 04/17] added args to ithor controller

---
 .../ithor_plugin/ithor_environment.py             | 15 ++++++++++++---
 .../tutorials/object_nav_ithor_ppo_baseline.py    |  6 ++++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 6d2815fe8..7a9227541 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -38,11 +38,15 @@ def __init__(
         fov: float = FOV,
         player_screen_width: int = 300,
         player_screen_height: int = 300,
+        gridSize: float = 0.15,
+        rotateStepDegrees: int = 30,
+        visibilityDistance: float = 1.0,
         quality: str = "Very Low",
         restrict_to_initially_reachable_points: bool = False,
         make_agents_visible: bool = True,
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
+
         **kwargs
     ) -> None:
         """Initializer.
@@ -86,8 +90,10 @@ def __init__(
         self._initially_reachable_points: Optional[List[Dict]] = None
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
         self._move_mag: Optional[float] = None
-        self._grid_size: Optional[float] = None
-        self._visibility_distance = visibility_distance
+        self._grid_size: Optional[float] = gridSize
+        print("grid size is ",self._grid_size )
+        self._rotate_step_degrees = rotateStepDegrees
+        self._visibility_distance = visibilityDistance
         self._fov = fov
         self.restrict_to_initially_reachable_points = (
             restrict_to_initially_reachable_points
@@ -97,7 +103,7 @@ def __init__(
         self._always_return_visible_range = False
         self.simplify_physics = simplify_physics
 
-        self.start(**kwargs)
+        self.start(None)
         # noinspection PyTypeHints
         self.controller.docker_enabled = docker_enabled  # type: ignore
 
@@ -192,6 +198,9 @@ def start(
             local_executable_path=self._local_thor_build,
             quality=self._quality,
             server_class=ai2thor.fifo_server.FifoServer,
+            gridSize = self._grid_size,
+            rotateStepDegrees = self._rotate_step_degrees,
+            visibilityDistance = self._visibility_distance ,
         )
 
         if (
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 8de90f84c..b50f0e059 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -1,6 +1,8 @@
 from math import ceil
 from typing import Dict, Any, List, Optional
 
+import glob
+import os
 import gym
 import numpy as np
 import torch
@@ -147,13 +149,13 @@ def machine_params(cls, mode="train", **kwargs):
         if mode == "train":
             nprocesses = 40
             workers_per_device = 1
-            devices = (
+            gpu_ids = (
                 [torch.device("cpu")]
                 if not torch.cuda.is_available()
                 else cls.DEFAULT_TRAIN_GPU_IDS * workers_per_device
             )
             nprocesses = evenly_distribute_count_into_bins(
-                nprocesses, max(len(devices), 1)
+                nprocesses, max(len(gpu_ids), 1)
             )
             sampler_devices = cls.DEFAULT_TRAIN_GPU_IDS
         elif mode == "valid":

From 668b3cadd1880e5aad42c88d617d25422a73d3aa Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Tue, 24 Aug 2021 06:38:08 -0700
Subject: [PATCH 05/17] dataset task sampler for ithor objectnav default

---
 .../ithor_plugin/ithor_environment.py         |  41 +++-
 .../ithor_plugin/ithor_task_samplers.py       | 216 +++++++++++++++++-
 .../experiments/objectnav_thor_base.py        |   2 +-
 .../object_nav_ithor_ppo_baseline.py          |   7 +-
 4 files changed, 261 insertions(+), 5 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 7a9227541..09e5ba3fe 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -46,7 +46,7 @@ def __init__(
         make_agents_visible: bool = True,
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
-
+        snap_to_grid: bool = False,
         **kwargs
     ) -> None:
         """Initializer.
@@ -86,6 +86,7 @@ def __init__(
         self.controller: Optional[Controller] = None
         self._started = False
         self._quality = quality
+        self._snap_to_grid = snap_to_grid
 
         self._initially_reachable_points: Optional[List[Dict]] = None
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
@@ -106,6 +107,9 @@ def __init__(
         self.start(None)
         # noinspection PyTypeHints
         self.controller.docker_enabled = docker_enabled  # type: ignore
+        self._extra_teleport_kwargs: Dict[
+            str, Any
+        ] = {}  # Used for backwards compatability with the teleport action
 
     @property
     def scene_name(self) -> str:
@@ -196,6 +200,7 @@ def start(
             width=self._start_player_screen_width,
             height=self._start_player_screen_height,
             local_executable_path=self._local_thor_build,
+            snapToGrid=self._snap_to_grid,
             quality=self._quality,
             server_class=ai2thor.fifo_server.FifoServer,
             gridSize = self._grid_size,
@@ -738,6 +743,40 @@ def step(
 
         return sr
 
+    def set_object_filter(self, object_ids: List[str]):
+        self.controller.step("SetObjectFilter", objectIds=object_ids, renderImage=False)
+
+    def reset_object_filter(self):
+        self.controller.step("ResetObjectFilter", renderImage=False)
+
+
+    def teleport(
+        self,
+        pose: Dict[str, float],
+        rotation: Dict[str, float],
+        horizon: float = 0.0,
+    ):
+        try:
+            e = self.controller.step(
+                action="TeleportFull",
+                x=pose["x"],
+                y=pose["y"],
+                z=pose["z"],
+                rotation=rotation,
+                horizon=horizon,
+                **self._extra_teleport_kwargs,
+            )
+        except ValueError as e:
+            if len(self._extra_teleport_kwargs) == 0:
+                self._extra_teleport_kwargs["standing"] = True
+            else:
+                raise e
+            return self.teleport(
+                pose=pose, rotation=rotation, horizon=horizon
+            )
+        return e.metadata["lastActionSuccess"]
+
+
     @staticmethod
     def position_dist(
         p0: Mapping[str, Any],
diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index e43b699af..005c23dc2 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -1,6 +1,8 @@
 import copy
 import random
-from typing import List, Dict, Optional, Any, Union, cast
+import gzip
+import json
+from typing import List, Optional, Union, Dict, Any, cast, Tuple
 
 import gym
 
@@ -198,3 +200,215 @@ def set_seed(self, seed: int):
         self.seed = seed
         if seed is not None:
             set_seed(seed)
+
+class ObjectNavDatasetTaskSampler(TaskSampler):
+    def __init__(
+        self,
+        scenes: List[str],
+        scene_directory: str,
+        sensors: List[Sensor],
+        max_steps: int,
+        env_args: Dict[str, Any],
+        action_space: gym.Space,
+        #rewards_config: Dict,
+        seed: Optional[int] = None,
+        deterministic_cudnn: bool = False,
+        loop_dataset: bool = True,
+        allow_flipping=False,
+        env_class=IThorEnvironment,
+        **kwargs,
+    ) -> None:
+        #self.rewards_config = rewards_config
+        self.env_args = env_args
+        self.scenes = scenes
+        self.episodes = {
+            scene: ObjectNavDatasetTaskSampler.load_dataset(
+                scene, scene_directory
+            )
+            for scene in scenes
+        }
+        self.env_class = env_class
+        self.object_types = [
+            ep["object_type"] for scene in self.episodes for ep in self.episodes[scene]
+        ]
+        self.env: Optional[IThorEnvironment] = None
+        self.sensors = sensors
+        self.max_steps = max_steps
+        self._action_space = action_space
+        self.allow_flipping = allow_flipping
+        self.scene_counter: Optional[int] = None
+        self.scene_order: Optional[List[str]] = None
+        self.scene_id: Optional[int] = None
+        # get the total number of tasks assigned to this process
+        if loop_dataset:
+            self.max_tasks = None
+        else:
+            self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes)
+        self.reset_tasks = self.max_tasks
+        self.scene_index = 0
+        self.episode_index = 0
+
+        self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None
+
+        self.seed: Optional[int] = None
+        self.set_seed(seed)
+
+        if deterministic_cudnn:
+            set_deterministic_cudnn()
+
+        self.reset()
+
+    def _create_environment(self) -> IThorEnvironment:
+        env = self.env_class( make_agents_visible=False,
+            object_open_speed=0.05,
+            restrict_to_initially_reachable_points=False,
+            **self.env_args)
+        return env
+
+    @staticmethod
+    def load_dataset(scene: str, base_directory: str) -> List[Dict]:
+        filename = (
+            "/".join([base_directory, scene])
+            if base_directory[-1] != "/"
+            else "".join([base_directory, scene])
+        )
+        filename += ".json.gz"
+        fin = gzip.GzipFile(filename, "r")
+        json_bytes = fin.read()
+        fin.close()
+        json_str = json_bytes.decode("utf-8")
+        data = json.loads(json_str)
+        random.shuffle(data)
+        return data
+
+    @staticmethod
+    def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict:
+        filename = (
+            "/".join([base_directory, scene])
+            if base_directory[-1] != "/"
+            else "".join([base_directory, scene])
+        )
+        filename += ".json.gz"
+        fin = gzip.GzipFile(filename, "r")
+        json_bytes = fin.read()
+        fin.close()
+        json_str = json_bytes.decode("utf-8")
+        data = json.loads(json_str)
+        return data
+
+    @property
+    def __len__(self) -> Union[int, float]:
+        """Length.
+
+        # Returns
+
+        Number of total tasks remaining that can be sampled. Can be float('inf').
+        """
+        return float("inf") if self.max_tasks is None else self.max_tasks
+
+    @property
+    def total_unique(self) -> Optional[Union[int, float]]:
+        return self.reset_tasks
+
+    @property
+    def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]:
+        return self._last_sampled_task
+
+    def close(self) -> None:
+        if self.env is not None:
+            self.env.stop()
+
+    @property
+    def all_observation_spaces_equal(self) -> bool:
+        """Check if observation spaces equal.
+
+        # Returns
+
+        True if all Tasks that can be sampled by this sampler have the
+            same observation space. Otherwise False.
+        """
+        return True
+
+    @property
+    def length(self) -> Union[int, float]:
+        """Length.
+
+        # Returns
+
+        Number of total tasks remaining that can be sampled. Can be float('inf').
+        """
+        return float("inf") if self.max_tasks is None else self.max_tasks
+
+    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviThorGridTask]:
+        if self.max_tasks is not None and self.max_tasks <= 0:
+            return None
+
+        if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]):
+            self.scene_index = (self.scene_index + 1) % len(self.scenes)
+            # shuffle the new list of episodes to train on
+            random.shuffle(self.episodes[self.scenes[self.scene_index]])
+            self.episode_index = 0
+        scene = self.scenes[self.scene_index]
+        episode = self.episodes[scene][self.episode_index]
+        if self.env is None:
+            self.env = self._create_environment()
+
+        if scene.replace("_physics", "") != self.env.scene_name.replace("_physics", ""):
+            self.env.reset(scene_name=scene)
+        else:
+            self.env.reset_object_filter()
+
+        self.env.set_object_filter(
+            object_ids=[
+                o["objectId"]
+                for o in self.env.last_event.metadata["objects"]
+                if o["objectType"] == episode["object_type"]
+            ]
+        )
+
+        task_info = {"scene": scene, "object_type": episode["object_type"]}
+        if len(task_info) == 0:
+            get_logger().warning(
+                "Scene {} does not contain any"
+                " objects of any of the types {}.".format(scene, self.object_types)
+            )
+        task_info["initial_position"] = episode["initial_position"]
+        task_info["initial_orientation"] = episode["initial_orientation"]
+        task_info["initial_horizon"] = episode.get("initial_horizon", 0)
+        task_info["distance_to_target"] = episode.get("shortest_path_length")
+        task_info["path_to_target"] = episode.get("shortest_path")
+        task_info["object_type"] = episode["object_type"]
+        task_info["id"] = episode["id"]
+        if self.allow_flipping and random.random() > 0.5:
+            task_info["mirrored"] = True
+        else:
+            task_info["mirrored"] = False
+
+        self.episode_index += 1
+        if self.max_tasks is not None:
+            self.max_tasks -= 1
+        if not self.env.teleport(
+            pose=episode["initial_position"],
+            rotation=episode["initial_orientation"],
+            horizon=episode.get("initial_horizon", 0),
+        ):
+            return self.next_task()
+        self._last_sampled_task = ObjectNaviThorGridTask(
+            env=self.env,
+            sensors=self.sensors,
+            task_info=task_info,
+            max_steps=self.max_steps,
+            action_space=self._action_space,
+        )
+    
+        return self._last_sampled_task
+
+    def reset(self):
+        self.episode_index = 0
+        self.scene_index = 0
+        self.max_tasks = self.reset_tasks
+
+    def set_seed(self, seed: int):
+        self.seed = seed
+        if seed is not None:
+            set_seed(seed)
\ No newline at end of file
diff --git a/projects/objectnav_baselines/experiments/objectnav_thor_base.py b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
index dd6d57e84..d147c90c7 100644
--- a/projects/objectnav_baselines/experiments/objectnav_thor_base.py
+++ b/projects/objectnav_baselines/experiments/objectnav_thor_base.py
@@ -89,7 +89,7 @@ def env_args(cls):
             visibilityDistance=cls.VISIBILITY_DISTANCE,
             gridSize=cls.STEP_SIZE,
             snapToGrid=False,
-            agentMode="default",
+            agentMode="locobot",
             fieldOfView=horizontal_to_vertical_fov(
                 horizontal_fov_in_degrees=cls.HORIZONTAL_FIELD_OF_VIEW,
                 width=cls.CAMERA_WIDTH,
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index b50f0e059..b2b56e30f 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -26,7 +26,7 @@
     RGBSensorThor,
     GoalObjectTypeThorSensor,
 )
-from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler
+from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler,ObjectNavDatasetTaskSampler
 from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
 from projects.objectnav_baselines.models.object_nav_models import (
     ObjectNavBaselineActorCritic,
@@ -188,7 +188,7 @@ def create_model(cls, **kwargs) -> nn.Module:
 
     @classmethod
     def make_sampler_fn(cls, **kwargs) -> TaskSampler:
-        return ObjectNavTaskSampler(**kwargs)
+        return ObjectNavDatasetTaskSampler(**kwargs)
 
     @staticmethod
     def _partition_inds(n: int, num_parts: int):
@@ -248,6 +248,7 @@ def train_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
+        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes")
         res["scene_period"] = "manual"
         res["env_args"] = {}
         res["env_args"].update(self.ENV_ARGS)
@@ -273,6 +274,7 @@ def valid_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
+        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes")
         res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
         res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
         res["env_args"] = {}
@@ -299,6 +301,7 @@ def test_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
+        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes")
         res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
         res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
         res["env_args"] = {}

From 7d8a136f9c2cda55947fcdc9f312ae07c9bb9216 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Tue, 24 Aug 2021 07:22:24 -0700
Subject: [PATCH 06/17] updated model to ResnetGRU

---
 .../object_nav_ithor_ppo_baseline.py          | 75 ++++++++++++++++---
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index b2b56e30f..eaeaf7746 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -1,6 +1,5 @@
 from math import ceil
-from typing import Dict, Any, List, Optional
-
+from typing import Dict, Any, List, Optional, Sequence, Union
 import glob
 import os
 import gym
@@ -9,7 +8,13 @@
 import torch.nn as nn
 import torch.optim as optim
 from torch.optim.lr_scheduler import LambdaLR
+from torchvision import models
 
+from allenact.base_abstractions.preprocessor import Preprocessor, SensorPreprocessorGraph
+from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
+from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
+from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
+from allenact.utils.experiment_utils import Builder
 from allenact.utils.experiment_utils import evenly_distribute_count_into_bins
 from allenact.algorithms.onpolicy_sync.losses import PPO
 from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig
@@ -30,6 +35,7 @@
 from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
 from projects.objectnav_baselines.models.object_nav_models import (
     ObjectNavBaselineActorCritic,
+    ResnetTensorObjectNavActorCritic
 )
 
 
@@ -74,10 +80,29 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
             height=SCREEN_SIZE,
             width=SCREEN_SIZE,
             use_resnet_normalization=True,
+            uuid="rgb_lowres",
         ),
         GoalObjectTypeThorSensor(object_types=OBJECT_TYPES),
     ]
 
+    PREPROCESSORS = [
+        Builder(
+            ResNetPreprocessor,
+            {
+                "input_height": SCREEN_SIZE,
+                "input_width": SCREEN_SIZE,
+                "output_width": 7,
+                "output_height": 7,
+                "output_dims": 512,
+                "pool": False,
+                "torchvision_resnet_model": models.resnet18,
+                "input_uuids": ["rgb_lowres"],
+                "output_uuid": "rgb_resnet",
+            },
+        ),
+    ]
+
+
     ENV_ARGS = {
         "player_screen_height": CAMERA_WIDTH,
         "player_screen_width": CAMERA_HEIGHT,
@@ -99,7 +124,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
 
     @classmethod
     def tag(cls):
-        return "ObjectNavThorPPO"
+        return "ObjectNavThorPPOResnetGRU"
 
     @classmethod
     def training_pipeline(cls, **kwargs):
@@ -167,23 +192,49 @@ def machine_params(cls, mode="train", **kwargs):
         else:
             raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
 
+
+        sensor_preprocessor_graph = (
+            SensorPreprocessorGraph(
+                source_observation_spaces=SensorSuite(cls.SENSORS).observation_spaces,
+                preprocessors=cls.PREPROCESSORS,
+            )
+            if mode == "train"
+            or (
+                (isinstance(nprocesses, int) and nprocesses > 0)
+                or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0)
+            )
+            else None
+        )
+
         return MachineParams(
             nprocesses=nprocesses,
             devices=gpu_ids,
+            sampler_devices=sampler_devices
+            if mode == "train"
+            else gpu_ids,  # ignored with > 1 gpu_ids
+            sensor_preprocessor_graph=sensor_preprocessor_graph
         )
 
+
+    
+
     @classmethod
     def create_model(cls, **kwargs) -> nn.Module:
-        return ObjectNavBaselineActorCritic(
-            action_space=gym.spaces.Discrete(
-                len(ObjectNaviThorGridTask.class_action_names())
-            ),
-            observation_space=SensorSuite(cls.SENSORS).observation_spaces,
-            rgb_uuid=cls.SENSORS[0].uuid,
-            depth_uuid=None,
-            goal_sensor_uuid="goal_object_type_ind",
+        has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS)
+        has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS)
+        goal_sensor_uuid = next(
+            (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)),
+            None,
+        )
+
+        return ResnetTensorObjectNavActorCritic(
+            action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())),
+            observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
+            goal_sensor_uuid=goal_sensor_uuid,
+            rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None,
+            depth_resnet_preprocessor_uuid="depth_resnet" if has_depth else None,
             hidden_size=512,
-            object_type_embedding_dim=8,
+            goal_dims=32,
         )
 
     @classmethod

From 24fb1d192e3caf8d94480c741eba7a137702ebf1 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Tue, 24 Aug 2021 08:00:36 -0700
Subject: [PATCH 07/17] autoformatted ithorObjectnav default agentmode

---
 .../ithor_plugin/ithor_environment.py         | 16 ++--
 .../ithor_plugin/ithor_task_samplers.py       | 23 ++---
 .../object_nav_ithor_ppo_baseline.py          | 88 +++++++++++--------
 3 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 09e5ba3fe..342d4f0f8 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -47,7 +47,7 @@ def __init__(
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
         snap_to_grid: bool = False,
-        **kwargs
+        **kwargs,
     ) -> None:
         """Initializer.
 
@@ -92,7 +92,7 @@ def __init__(
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
         self._move_mag: Optional[float] = None
         self._grid_size: Optional[float] = gridSize
-        print("grid size is ",self._grid_size )
+        print("grid size is ", self._grid_size)
         self._rotate_step_degrees = rotateStepDegrees
         self._visibility_distance = visibilityDistance
         self._fov = fov
@@ -203,9 +203,9 @@ def start(
             snapToGrid=self._snap_to_grid,
             quality=self._quality,
             server_class=ai2thor.fifo_server.FifoServer,
-            gridSize = self._grid_size,
-            rotateStepDegrees = self._rotate_step_degrees,
-            visibilityDistance = self._visibility_distance ,
+            gridSize=self._grid_size,
+            rotateStepDegrees=self._rotate_step_degrees,
+            visibilityDistance=self._visibility_distance,
         )
 
         if (
@@ -749,7 +749,6 @@ def set_object_filter(self, object_ids: List[str]):
     def reset_object_filter(self):
         self.controller.step("ResetObjectFilter", renderImage=False)
 
-
     def teleport(
         self,
         pose: Dict[str, float],
@@ -771,12 +770,9 @@ def teleport(
                 self._extra_teleport_kwargs["standing"] = True
             else:
                 raise e
-            return self.teleport(
-                pose=pose, rotation=rotation, horizon=horizon
-            )
+            return self.teleport(pose=pose, rotation=rotation, horizon=horizon)
         return e.metadata["lastActionSuccess"]
 
-
     @staticmethod
     def position_dist(
         p0: Mapping[str, Any],
diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index 005c23dc2..3e31a0c1b 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -201,6 +201,7 @@ def set_seed(self, seed: int):
         if seed is not None:
             set_seed(seed)
 
+
 class ObjectNavDatasetTaskSampler(TaskSampler):
     def __init__(
         self,
@@ -210,7 +211,7 @@ def __init__(
         max_steps: int,
         env_args: Dict[str, Any],
         action_space: gym.Space,
-        #rewards_config: Dict,
+        # rewards_config: Dict,
         seed: Optional[int] = None,
         deterministic_cudnn: bool = False,
         loop_dataset: bool = True,
@@ -218,13 +219,11 @@ def __init__(
         env_class=IThorEnvironment,
         **kwargs,
     ) -> None:
-        #self.rewards_config = rewards_config
+        # self.rewards_config = rewards_config
         self.env_args = env_args
         self.scenes = scenes
         self.episodes = {
-            scene: ObjectNavDatasetTaskSampler.load_dataset(
-                scene, scene_directory
-            )
+            scene: ObjectNavDatasetTaskSampler.load_dataset(scene, scene_directory)
             for scene in scenes
         }
         self.env_class = env_class
@@ -259,10 +258,12 @@ def __init__(
         self.reset()
 
     def _create_environment(self) -> IThorEnvironment:
-        env = self.env_class( make_agents_visible=False,
+        env = self.env_class(
+            make_agents_visible=False,
             object_open_speed=0.05,
             restrict_to_initially_reachable_points=False,
-            **self.env_args)
+            **self.env_args,
+        )
         return env
 
     @staticmethod
@@ -339,7 +340,9 @@ def length(self) -> Union[int, float]:
         """
         return float("inf") if self.max_tasks is None else self.max_tasks
 
-    def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviThorGridTask]:
+    def next_task(
+        self, force_advance_scene: bool = False
+    ) -> Optional[ObjectNaviThorGridTask]:
         if self.max_tasks is not None and self.max_tasks <= 0:
             return None
 
@@ -400,7 +403,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviTho
             max_steps=self.max_steps,
             action_space=self._action_space,
         )
-    
+
         return self._last_sampled_task
 
     def reset(self):
@@ -411,4 +414,4 @@ def reset(self):
     def set_seed(self, seed: int):
         self.seed = seed
         if seed is not None:
-            set_seed(seed)
\ No newline at end of file
+            set_seed(seed)
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index eaeaf7746..2946db433 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -10,7 +10,10 @@
 from torch.optim.lr_scheduler import LambdaLR
 from torchvision import models
 
-from allenact.base_abstractions.preprocessor import Preprocessor, SensorPreprocessorGraph
+from allenact.base_abstractions.preprocessor import (
+    Preprocessor,
+    SensorPreprocessorGraph,
+)
 from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
 from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
 from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
@@ -31,11 +34,14 @@
     RGBSensorThor,
     GoalObjectTypeThorSensor,
 )
-from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler,ObjectNavDatasetTaskSampler
+from allenact_plugins.ithor_plugin.ithor_task_samplers import (
+    ObjectNavTaskSampler,
+    ObjectNavDatasetTaskSampler,
+)
 from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
 from projects.objectnav_baselines.models.object_nav_models import (
     ObjectNavBaselineActorCritic,
-    ResnetTensorObjectNavActorCritic
+    ResnetTensorObjectNavActorCritic,
 )
 
 
@@ -48,26 +54,34 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
     # A simple setting, train/valid/test are all the same single scene
     # and we're looking for a single object
     OBJECT_TYPES = sorted(
-            [
-                "AlarmClock",
-                "Apple",
-                "Book",
-                "Bowl",
-                "Box",
-                "Candle",
-                "GarbageCan",
-                "HousePlant",
-                "Laptop",
-                "SoapBottle",
-                "Television",
-                "Toaster",
-            ]
-        )
-    train_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz")
-    val_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz")
-    test_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz")
-
-    TRAIN_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)]
+        [
+            "AlarmClock",
+            "Apple",
+            "Book",
+            "Bowl",
+            "Box",
+            "Candle",
+            "GarbageCan",
+            "HousePlant",
+            "Laptop",
+            "SoapBottle",
+            "Television",
+            "Toaster",
+        ]
+    )
+    train_path = os.path.join(
+        os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz"
+    )
+    val_path = os.path.join(
+        os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz"
+    )
+    test_path = os.path.join(
+        os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz"
+    )
+
+    TRAIN_SCENES = [
+        scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)
+    ]
     VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)]
     TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)]
 
@@ -102,14 +116,13 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
         ),
     ]
 
-
     ENV_ARGS = {
         "player_screen_height": CAMERA_WIDTH,
         "player_screen_width": CAMERA_HEIGHT,
         "quality": "Very Low",
         "rotateStepDegrees": 30,
         "visibilityDistance": 1.0,
-        "gridSize": 0.25,    
+        "gridSize": 0.25,
     }
 
     MAX_STEPS = 128
@@ -117,7 +130,6 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
     VALID_SAMPLES_IN_SCENE = 10
     TEST_SAMPLES_IN_SCENE = 100
 
-
     DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count()))
     DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,)
     DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,)
@@ -133,7 +145,7 @@ def training_pipeline(cls, **kwargs):
         num_mini_batch = 1 if not torch.cuda.is_available() else 6
         update_repeats = 4
         num_steps = 128
-        metric_accumulate_interval = 10000 # Log every 10 max length tasks
+        metric_accumulate_interval = 10000  # Log every 10 max length tasks
         save_interval = 5000000
         gamma = 0.99
         use_gae = True
@@ -192,7 +204,6 @@ def machine_params(cls, mode="train", **kwargs):
         else:
             raise NotImplementedError("mode must be 'train', 'valid', or 'test'.")
 
-
         sensor_preprocessor_graph = (
             SensorPreprocessorGraph(
                 source_observation_spaces=SensorSuite(cls.SENSORS).observation_spaces,
@@ -212,12 +223,9 @@ def machine_params(cls, mode="train", **kwargs):
             sampler_devices=sampler_devices
             if mode == "train"
             else gpu_ids,  # ignored with > 1 gpu_ids
-            sensor_preprocessor_graph=sensor_preprocessor_graph
+            sensor_preprocessor_graph=sensor_preprocessor_graph,
         )
 
-
-    
-
     @classmethod
     def create_model(cls, **kwargs) -> nn.Module:
         has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS)
@@ -228,7 +236,9 @@ def create_model(cls, **kwargs) -> nn.Module:
         )
 
         return ResnetTensorObjectNavActorCritic(
-            action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())),
+            action_space=gym.spaces.Discrete(
+                len(ObjectNaviThorGridTask.class_action_names())
+            ),
             observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces,
             goal_sensor_uuid=goal_sensor_uuid,
             rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None,
@@ -299,7 +309,9 @@ def train_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
-        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes")
+        res["scene_directory"] = os.path.join(
+            os.getcwd(), "datasets/ithor-objectnav/train/episodes"
+        )
         res["scene_period"] = "manual"
         res["env_args"] = {}
         res["env_args"].update(self.ENV_ARGS)
@@ -325,7 +337,9 @@ def valid_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
-        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes")
+        res["scene_directory"] = os.path.join(
+            os.getcwd(), "datasets/ithor-objectnav/val/episodes"
+        )
         res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
         res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
         res["env_args"] = {}
@@ -352,7 +366,9 @@ def test_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
-        res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes")
+        res["scene_directory"] = os.path.join(
+            os.getcwd(), "datasets/ithor-objectnav/val/episodes"
+        )
         res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
         res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
         res["env_args"] = {}

From 37795be0fccc57f3a1af25cf5c4549ce59b6ed46 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Tue, 24 Aug 2021 10:50:33 -0700
Subject: [PATCH 08/17] reset to default iTHOR args

---
 allenact_plugins/ithor_plugin/ithor_environment.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 342d4f0f8..b5b489f94 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -38,9 +38,9 @@ def __init__(
         fov: float = FOV,
         player_screen_width: int = 300,
         player_screen_height: int = 300,
-        gridSize: float = 0.15,
-        rotateStepDegrees: int = 30,
-        visibilityDistance: float = 1.0,
+        gridSize: float = 0.25,
+        rotateStepDegrees: int = 90,
+        visibilityDistance: float = 1.25,
         quality: str = "Very Low",
         restrict_to_initially_reachable_points: bool = False,
         make_agents_visible: bool = True,
@@ -92,7 +92,6 @@ def __init__(
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
         self._move_mag: Optional[float] = None
         self._grid_size: Optional[float] = gridSize
-        print("grid size is ", self._grid_size)
         self._rotate_step_degrees = rotateStepDegrees
         self._visibility_distance = visibilityDistance
         self._fov = fov

From 6a7264948768e93465e1e499ad113b241ac8f058 Mon Sep 17 00:00:00 2001
From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com>
Date: Tue, 24 Aug 2021 22:22:06 +0200
Subject: [PATCH 09/17] restore iTHOR defaults

---
 allenact_plugins/ithor_plugin/ithor_environment.py  | 11 +++++------
 projects/tutorials/object_nav_ithor_ppo_baseline.py |  7 ++++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index b5b489f94..0562de0b7 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -39,14 +39,13 @@ def __init__(
         player_screen_width: int = 300,
         player_screen_height: int = 300,
         gridSize: float = 0.25,
-        rotateStepDegrees: int = 90,
-        visibilityDistance: float = 1.25,
+        rotate_step_degrees: int = 90,
         quality: str = "Very Low",
         restrict_to_initially_reachable_points: bool = False,
         make_agents_visible: bool = True,
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
-        snap_to_grid: bool = False,
+        snap_to_grid: bool = True,
         **kwargs,
     ) -> None:
         """Initializer.
@@ -91,9 +90,9 @@ def __init__(
         self._initially_reachable_points: Optional[List[Dict]] = None
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
         self._move_mag: Optional[float] = None
-        self._grid_size: Optional[float] = gridSize
-        self._rotate_step_degrees = rotateStepDegrees
-        self._visibility_distance = visibilityDistance
+        self._grid_size: Optional[float] = grid_size
+        self._rotate_step_degrees = rotate_step_degrees
+        self._visibility_distance = visibility_distance
         self._fov = fov
         self.restrict_to_initially_reachable_points = (
             restrict_to_initially_reachable_points
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 2946db433..09cec2d5b 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -120,9 +120,10 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
         "player_screen_height": CAMERA_WIDTH,
         "player_screen_width": CAMERA_HEIGHT,
         "quality": "Very Low",
-        "rotateStepDegrees": 30,
-        "visibilityDistance": 1.0,
-        "gridSize": 0.25,
+        "rotate_step_degrees": 30,
+        "visibility_distance": 1.0,
+        "grid_size": 0.25,
+        "snap_to_grid": False
     }
 
     MAX_STEPS = 128

From 90ef4f5184cf07796b0bf66380157412dfd7a439 Mon Sep 17 00:00:00 2001
From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com>
Date: Tue, 24 Aug 2021 22:33:24 +0200
Subject: [PATCH 10/17] looping in val/test sampler to False

---
 projects/tutorials/object_nav_ithor_ppo_baseline.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 09cec2d5b..998fdb806 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -313,6 +313,7 @@ def train_task_sampler_args(
         res["scene_directory"] = os.path.join(
             os.getcwd(), "datasets/ithor-objectnav/train/episodes"
         )
+        res["loop_dataset"] = True
         res["scene_period"] = "manual"
         res["env_args"] = {}
         res["env_args"].update(self.ENV_ARGS)
@@ -338,11 +339,10 @@ def valid_task_sampler_args(
             seeds=seeds,
             deterministic_cudnn=deterministic_cudnn,
         )
+        res["loop_dataset"] = False
         res["scene_directory"] = os.path.join(
             os.getcwd(), "datasets/ithor-objectnav/val/episodes"
         )
-        res["scene_period"] = self.VALID_SAMPLES_IN_SCENE
-        res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"])
         res["env_args"] = {}
         res["env_args"].update(self.ENV_ARGS)
         res["env_args"]["x_display"] = (
@@ -370,8 +370,7 @@ def test_task_sampler_args(
         res["scene_directory"] = os.path.join(
             os.getcwd(), "datasets/ithor-objectnav/val/episodes"
         )
-        res["scene_period"] = self.TEST_SAMPLES_IN_SCENE
-        res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"])
+        res["loop_dataset"] = False
         res["env_args"] = {}
         res["env_args"].update(self.ENV_ARGS)
         res["env_args"]["x_display"] = (

From a7c7f0d093d3f930d25100aa1e6702aeb89a0273 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Tue, 24 Aug 2021 14:53:14 -0700
Subject: [PATCH 11/17] fixed typos

---
 allenact_plugins/ithor_plugin/ithor_environment.py  | 2 +-
 projects/tutorials/object_nav_ithor_ppo_baseline.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 0562de0b7..fe745c9f2 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -38,7 +38,7 @@ def __init__(
         fov: float = FOV,
         player_screen_width: int = 300,
         player_screen_height: int = 300,
-        gridSize: float = 0.25,
+        grid_size: float = 0.25,
         rotate_step_degrees: int = 90,
         quality: str = "Very Low",
         restrict_to_initially_reachable_points: bool = False,
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 998fdb806..08d448319 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -117,8 +117,8 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
     ]
 
     ENV_ARGS = {
-        "player_screen_height": CAMERA_WIDTH,
-        "player_screen_width": CAMERA_HEIGHT,
+        "player_screen_height": CAMERA_HEIGHT,
+        "player_screen_width": CAMERA_WIDTH,
         "quality": "Very Low",
         "rotate_step_degrees": 30,
         "visibility_distance": 1.0,

From 6a07db0256fc69510277cb1dbcf49affe9cd0828 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Wed, 25 Aug 2021 06:14:00 -0700
Subject: [PATCH 12/17] Dataset Task sampler naming change

---
 allenact_plugins/ithor_plugin/ithor_task_samplers.py | 4 ++--
 projects/tutorials/object_nav_ithor_ppo_baseline.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index 3e31a0c1b..92075b478 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -202,7 +202,7 @@ def set_seed(self, seed: int):
             set_seed(seed)
 
 
-class ObjectNavDatasetTaskSampler(TaskSampler):
+class ObjectNaviThorDatasetTaskSampler(TaskSampler):
     def __init__(
         self,
         scenes: List[str],
@@ -223,7 +223,7 @@ def __init__(
         self.env_args = env_args
         self.scenes = scenes
         self.episodes = {
-            scene: ObjectNavDatasetTaskSampler.load_dataset(scene, scene_directory)
+            scene: ObjectNaviThorDatasetTaskSampler.load_dataset(scene, scene_directory)
             for scene in scenes
         }
         self.env_class = env_class
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 08d448319..c2c0a9852 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -36,7 +36,7 @@
 )
 from allenact_plugins.ithor_plugin.ithor_task_samplers import (
     ObjectNavTaskSampler,
-    ObjectNavDatasetTaskSampler,
+    ObjectNaviThorDatasetTaskSampler,
 )
 from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
 from projects.objectnav_baselines.models.object_nav_models import (
@@ -250,7 +250,7 @@ def create_model(cls, **kwargs) -> nn.Module:
 
     @classmethod
     def make_sampler_fn(cls, **kwargs) -> TaskSampler:
-        return ObjectNavDatasetTaskSampler(**kwargs)
+        return ObjectNaviThorDatasetTaskSampler(**kwargs)
 
     @staticmethod
     def _partition_inds(n: int, num_parts: int):

From fe3ed2eb4966c167a6dea09c846a337443a10aa4 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Wed, 25 Aug 2021 07:12:13 -0700
Subject: [PATCH 13/17] pose to position

---
 allenact_plugins/ithor_plugin/ithor_environment.py   | 10 +++++-----
 allenact_plugins/ithor_plugin/ithor_task_samplers.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index fe745c9f2..fce33e4b0 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -749,16 +749,16 @@ def reset_object_filter(self):
 
     def teleport(
         self,
-        pose: Dict[str, float],
+        position: Dict[str, float],
         rotation: Dict[str, float],
         horizon: float = 0.0,
     ):
         try:
             e = self.controller.step(
                 action="TeleportFull",
-                x=pose["x"],
-                y=pose["y"],
-                z=pose["z"],
+                x=position["x"],
+                y=position["y"],
+                z=position["z"],
                 rotation=rotation,
                 horizon=horizon,
                 **self._extra_teleport_kwargs,
@@ -768,7 +768,7 @@ def teleport(
                 self._extra_teleport_kwargs["standing"] = True
             else:
                 raise e
-            return self.teleport(pose=pose, rotation=rotation, horizon=horizon)
+            return self.teleport(position=position, rotation=rotation, horizon=horizon)
         return e.metadata["lastActionSuccess"]
 
     @staticmethod
diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index 92075b478..f0c3c9b0d 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -391,7 +391,7 @@ def next_task(
         if self.max_tasks is not None:
             self.max_tasks -= 1
         if not self.env.teleport(
-            pose=episode["initial_position"],
+            position=episode["initial_position"],
             rotation=episode["initial_orientation"],
             horizon=episode.get("initial_horizon", 0),
         ):

From be37e2a8128d80b23e6916bdad6bef846f0a508a Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Wed, 25 Aug 2021 10:14:14 -0700
Subject: [PATCH 14/17] adding reward config

---
 .../ithor_plugin/ithor_task_samplers.py       |  5 ++--
 allenact_plugins/ithor_plugin/ithor_tasks.py  | 30 +++++++++++++++++--
 .../object_nav_ithor_ppo_baseline.py          | 11 +++++--
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index f0c3c9b0d..ca83de9be 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -211,7 +211,7 @@ def __init__(
         max_steps: int,
         env_args: Dict[str, Any],
         action_space: gym.Space,
-        # rewards_config: Dict,
+        rewards_config: Dict,
         seed: Optional[int] = None,
         deterministic_cudnn: bool = False,
         loop_dataset: bool = True,
@@ -219,7 +219,7 @@ def __init__(
         env_class=IThorEnvironment,
         **kwargs,
     ) -> None:
-        # self.rewards_config = rewards_config
+        self.rewards_config = rewards_config
         self.env_args = env_args
         self.scenes = scenes
         self.episodes = {
@@ -402,6 +402,7 @@ def next_task(
             task_info=task_info,
             max_steps=self.max_steps,
             action_space=self._action_space,
+            reward_configs=self.rewards_config,
         )
 
         return self._last_sampled_task
diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py
index 75670db57..259f81f7f 100644
--- a/allenact_plugins/ithor_plugin/ithor_tasks.py
+++ b/allenact_plugins/ithor_plugin/ithor_tasks.py
@@ -3,6 +3,7 @@
 
 import gym
 import numpy as np
+import math
 
 from allenact.base_abstractions.misc import RLStepResult
 from allenact.base_abstractions.sensor import Sensor
@@ -65,6 +66,7 @@ def __init__(
         sensors: List[Sensor],
         task_info: Dict[str, Any],
         max_steps: int,
+        reward_configs: Dict[str, Any],
         **kwargs,
     ) -> None:
         """Initializer.
@@ -74,6 +76,9 @@ def __init__(
         super().__init__(
             env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs
         )
+
+        self._rewards: List[float] = []
+        self.reward_configs = reward_configs
         self._took_end_action: bool = False
         self._success: Optional[bool] = False
         self._subsampled_locations_from_which_obj_visible: Optional[
@@ -137,15 +142,36 @@ def is_goal_object_visible(self) -> bool:
             for o in self.env.visible_objects()
         )
 
+    def dist_to_target(self):
+        return self.env.distance_to_point(self.task_info["target"])
+
+
     def judge(self) -> float:
+        """Judge the last event."""
+        reward = self.reward_configs["step_penalty"]
+
+        #reward += self.shaping()
+
+        if self._took_end_action:
+            if self._success:
+                reward += self.reward_configs["goal_success_reward"]
+            else:
+                reward += self.reward_configs["failed_stop_reward"]
+        elif self.num_steps_taken() + 1 >= self.max_steps:
+            reward += self.reward_configs.get("reached_max_steps_reward", 0.0)
+
+        self._rewards.append(float(reward))
+        return float(reward)
+
+    def judge_old(self) -> float:
         """Compute the reward after having taken a step."""
         reward = -0.01
 
         if not self.last_action_success:
-            reward += -0.03
+            reward += -0.00
 
         if self._took_end_action:
-            reward += 1.0 if self._success else -1.0
+            reward += 10.0 if self._success else -0.0
 
         return float(reward)
 
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index c2c0a9852..4ab8025c3 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -126,7 +126,13 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
         "snap_to_grid": False
     }
 
-    MAX_STEPS = 128
+    MAX_STEPS = 500
+    REWARD_CONFIG = {
+        "step_penalty": -0.01,
+        "goal_success_reward": 10.0,
+        "failed_stop_reward": 0.0,
+        "shaping_weight": 1.0,
+    }
     ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None
     VALID_SAMPLES_IN_SCENE = 10
     TEST_SAMPLES_IN_SCENE = 100
@@ -137,7 +143,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
 
     @classmethod
     def tag(cls):
-        return "ObjectNavThorPPOResnetGRU"
+        return "ObjectNaviThorPPOResnetGRU"
 
     @classmethod
     def training_pipeline(cls, **kwargs):
@@ -293,6 +299,7 @@ def _get_sampler_args_for_scene_split(
             ),
             "seed": seeds[process_ind] if seeds is not None else None,
             "deterministic_cudnn": deterministic_cudnn,
+            "rewards_config": self.REWARD_CONFIG,
         }
 
     def train_task_sampler_args(

From a76d94f0b4e7080846336ca8e1060593000852c4 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Wed, 25 Aug 2021 14:10:09 -0700
Subject: [PATCH 15/17] shaping rewards added

---
 .../ithor_plugin/ithor_environment.py         | 126 ++++++++++++++++++
 allenact_plugins/ithor_plugin/ithor_tasks.py  |  66 ++++++++-
 2 files changed, 187 insertions(+), 5 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index fce33e4b0..7ecff01b9 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -15,6 +15,12 @@
 from allenact.utils.system import get_logger
 from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
 from allenact_plugins.ithor_plugin.ithor_util import round_to_factor
+from ai2thor.util import metrics
+from allenact.utils.cache_utils import (
+    DynamicDistanceCache,
+    pos_to_str_for_cache,
+    str_to_pos_for_cache,
+)
 
 
 class IThorEnvironment(object):
@@ -31,6 +37,7 @@ class IThorEnvironment(object):
 
     def __init__(
         self,
+        all_metadata_available: bool = True,
         x_display: Optional[str] = None,
         docker_enabled: bool = False,
         local_thor_build: Optional[str] = None,
@@ -46,6 +53,7 @@ def __init__(
         object_open_speed: float = 1.0,
         simplify_physics: bool = False,
         snap_to_grid: bool = True,
+        agent_count: int = 1,
         **kwargs,
     ) -> None:
         """Initializer.
@@ -86,6 +94,7 @@ def __init__(
         self._started = False
         self._quality = quality
         self._snap_to_grid = snap_to_grid
+        self.agent_count = agent_count
 
         self._initially_reachable_points: Optional[List[Dict]] = None
         self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None
@@ -101,14 +110,119 @@ def __init__(
         self.object_open_speed = object_open_speed
         self._always_return_visible_range = False
         self.simplify_physics = simplify_physics
+        self.all_metadata_available = all_metadata_available
+
+
+
+        self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None
+        self.distance_cache: Optional[DynamicDistanceCache] = None
+
+        
 
         self.start(None)
         # noinspection PyTypeHints
+        if self.all_metadata_available:
+            self.scene_to_reachable_positions = {
+                self.scene_name: copy.deepcopy(self.currently_reachable_points)
+            }
+            assert len(self.scene_to_reachable_positions[self.scene_name]) > 10
+
+            self.distance_cache = DynamicDistanceCache(rounding=1)
         self.controller.docker_enabled = docker_enabled  # type: ignore
         self._extra_teleport_kwargs: Dict[
             str, Any
         ] = {}  # Used for backwards compatability with the teleport action
 
+    def path_from_point_to_object_type(
+        self, point: Dict[str, float], object_type: str, allowed_error: float
+    ) -> Optional[List[Dict[str, float]]]:
+        event = self.controller.step(
+            action="GetShortestPath",
+            objectType=object_type,
+            position=point,
+            allowedError=allowed_error,
+        )
+        if event.metadata["lastActionSuccess"]:
+            return event.metadata["actionReturn"]["corners"]
+        else:
+            get_logger().debug(
+                "Failed to find path for {} in {}. Start point {}, agent state {}.".format(
+                    object_type,
+                    self.controller.last_event.metadata["sceneName"],
+                    point,
+                    self.agent_state(),
+                )
+            )
+            return None
+
+    def distance_from_point_to_object_type(
+        self, point: Dict[str, float], object_type: str, allowed_error: float
+    ) -> float:
+        """Minimal geodesic distance from a point to an object of the given
+        type.
+        It might return -1.0 for unreachable targets.
+        """
+        path = self.path_from_point_to_object_type(point, object_type, allowed_error)
+        if path:
+            # Because `allowed_error != 0` means that the path returned above might not start
+            # at `point`, we explicitly add any offset there is.
+            s_dist = math.sqrt(
+                (point["x"] - path[0]["x"]) ** 2 + (point["z"] - path[0]["z"]) ** 2
+            )
+            return metrics.path_distance(path) + s_dist
+        return -1.0
+
+    def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float:
+        """Minimal geodesic distance to object of given type from agent's
+        current location.
+        It might return -1.0 for unreachable targets.
+        """
+        assert 0 <= agent_id < self.agent_count
+        assert (
+            self.all_metadata_available
+        ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`."
+
+        def retry_dist(position: Dict[str, float], object_type: str):
+            allowed_error = 0.05
+            debug_log = ""
+            d = -1.0
+            while allowed_error < 2.5:
+                d = self.distance_from_point_to_object_type(
+                    position, object_type, allowed_error
+                )
+                if d < 0:
+                    debug_log = (
+                        f"In scene {self.scene_name}, could not find a path from {position} to {object_type} with"
+                        f" {allowed_error} error tolerance. Increasing this tolerance to"
+                        f" {2 * allowed_error} any trying again."
+                    )
+                    allowed_error *= 2
+                else:
+                    break
+            if d < 0:
+                get_logger().warning(
+                    f"In scene {self.scene_name}, could not find a path from {position} to {object_type}"
+                    f" with {allowed_error} error tolerance. Returning a distance of -1."
+                )
+            elif debug_log != "":
+                get_logger().debug(debug_log)
+            return d
+
+        return self.distance_cache.find_distance(
+            self.scene_name,
+            self.controller.last_event.events[agent_id].metadata["agent"]["position"],
+            object_type,
+            retry_dist,
+        )
+
+
+    @property
+    def currently_reachable_points(self) -> List[Dict[str, float]]:
+        """List of {"x": x, "y": y, "z": z} locations in the scene that are
+        currently reachable."""
+        self.step({"action": "GetReachablePositions"})
+        return self.last_event.metadata["actionReturn"]  # type:ignore
+
     @property
     def scene_name(self) -> str:
         """Current ai2thor scene."""
@@ -541,6 +655,18 @@ def currently_reachable_points(self) -> List[Dict[str, float]]:
         currently reachable."""
         self.step({"action": "GetReachablePositions"})
         return self.last_event.metadata["actionReturn"]  # type:ignore
+        
+    def agent_state(self, agent_id: int = 0) -> Dict:
+        """Return agent position, rotation and horizon."""
+        assert 0 <= agent_id < self.agent_count
+
+        agent_meta = self.last_event.events[agent_id].metadata["agent"]
+        return {
+            **{k: float(v) for k, v in agent_meta["position"].items()},
+            "rotation": {k: float(v) for k, v in agent_meta["rotation"].items()},
+            "horizon": round(float(agent_meta["cameraHorizon"]), 1),
+        }
+
 
     def get_agent_location(self) -> Dict[str, Union[float, bool]]:
         """Gets agent's location."""
diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py
index 259f81f7f..d99c0d4c9 100644
--- a/allenact_plugins/ithor_plugin/ithor_tasks.py
+++ b/allenact_plugins/ithor_plugin/ithor_tasks.py
@@ -85,9 +85,21 @@ def __init__(
             List[Tuple[float, float, int, int]]
         ] = None
 
+        self._all_metadata_available = env.all_metadata_available
+        self.path: List = (
+            []
+        )  # the initial coordinate will be directly taken from the optimal path
+        self.travelled_distance = 0.0
         self.task_info["followed_path"] = [self.env.get_agent_location()]
         self.task_info["action_names"] = self.class_action_names()
 
+        if self._all_metadata_available:
+            self.last_geodesic_distance = self.env.distance_to_object_type(
+                self.task_info["object_type"]
+            )
+            self.optimal_distance = self.last_geodesic_distance
+            self.closest_geo_distance = self.last_geodesic_distance
+
     @property
     def action_space(self):
         return gym.spaces.Discrete(len(self._actions))
@@ -121,13 +133,21 @@ def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
             ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:
                 self.env.update_graph_with_failed_action(failed_action=action_str)
 
-            self.task_info["followed_path"].append(self.env.get_agent_location())
+            pose = self.env.agent_state()
+
+            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
+            self.task_info["followed_path"].append(pose)
+            if len(self.path) > 1:
+                self.travelled_distance += IThorEnvironment.position_dist(
+                    p0=self.path[-1], p1=self.path[-2], ignore_y=True
+                )
+            #self.task_info["followed_path"].append(self.env.get_agent_location())
 
         step_result = RLStepResult(
             observation=self.get_observations(),
             reward=self.judge(),
             done=self.is_done(),
-            info={"last_action_success": self.last_action_success},
+            info={"last_action_success": self.last_action_success, "action": action_str},
         )
         return step_result
 
@@ -142,15 +162,51 @@ def is_goal_object_visible(self) -> bool:
             for o in self.env.visible_objects()
         )
 
-    def dist_to_target(self):
-        return self.env.distance_to_point(self.task_info["target"])
+
+    def shaping(self) -> float:
+        rew = 0.0
+
+        if self.reward_configs["shaping_weight"] == 0.0:
+            return rew
+
+        geodesic_distance = self.env.distance_to_object_type(
+            self.task_info["object_type"]
+        )
+
+        # Ensuring the reward magnitude is not greater than the total distance moved
+        max_reward_mag = 0.0
+        if len(self.path) >= 2:
+            p0, p1 = self.path[-2:]
+            max_reward_mag = math.sqrt(
+                (p0["x"] - p1["x"]) ** 2 + (p0["z"] - p1["z"]) ** 2
+            )
+
+        if self.reward_configs.get("positive_only_reward", False):
+            if geodesic_distance > 0.5:
+                rew = max(self.closest_geo_distance - geodesic_distance, 0)
+        else:
+            if (
+                self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5
+            ):  # (robothor limits)
+                rew += self.last_geodesic_distance - geodesic_distance
+
+        self.last_geodesic_distance = geodesic_distance
+        self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance)
+
+        return (
+            max(
+                min(rew, max_reward_mag),
+                -max_reward_mag,
+            )
+            * self.reward_configs["shaping_weight"]
+        )
 
 
     def judge(self) -> float:
         """Judge the last event."""
         reward = self.reward_configs["step_penalty"]
 
-        #reward += self.shaping()
+        reward += self.shaping()
 
         if self._took_end_action:
             if self._success:

From 11bc7cd88a907eaeece781afbc1409ca78d70e44 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Wed, 25 Aug 2021 14:35:09 -0700
Subject: [PATCH 16/17] auto formatting

---
 allenact_plugins/ithor_plugin/ithor_environment.py  |  8 +-------
 allenact_plugins/ithor_plugin/ithor_tasks.py        |  9 +++++----
 projects/tutorials/object_nav_ithor_ppo_baseline.py | 12 ++++++++----
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index 7ecff01b9..ffde80e93 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -112,13 +112,9 @@ def __init__(
         self.simplify_physics = simplify_physics
         self.all_metadata_available = all_metadata_available
 
-
-
         self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None
         self.distance_cache: Optional[DynamicDistanceCache] = None
 
-        
-
         self.start(None)
         # noinspection PyTypeHints
         if self.all_metadata_available:
@@ -215,7 +211,6 @@ def retry_dist(position: Dict[str, float], object_type: str):
             retry_dist,
         )
 
-
     @property
     def currently_reachable_points(self) -> List[Dict[str, float]]:
         """List of {"x": x, "y": y, "z": z} locations in the scene that are
@@ -655,7 +650,7 @@ def currently_reachable_points(self) -> List[Dict[str, float]]:
         currently reachable."""
         self.step({"action": "GetReachablePositions"})
         return self.last_event.metadata["actionReturn"]  # type:ignore
-        
+
     def agent_state(self, agent_id: int = 0) -> Dict:
         """Return agent position, rotation and horizon."""
         assert 0 <= agent_id < self.agent_count
@@ -667,7 +662,6 @@ def agent_state(self, agent_id: int = 0) -> Dict:
             "horizon": round(float(agent_meta["cameraHorizon"]), 1),
         }
 
-
     def get_agent_location(self) -> Dict[str, Union[float, bool]]:
         """Gets agent's location."""
         metadata = self.controller.last_event.metadata
diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py
index d99c0d4c9..44664c686 100644
--- a/allenact_plugins/ithor_plugin/ithor_tasks.py
+++ b/allenact_plugins/ithor_plugin/ithor_tasks.py
@@ -141,13 +141,16 @@ def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
                 self.travelled_distance += IThorEnvironment.position_dist(
                     p0=self.path[-1], p1=self.path[-2], ignore_y=True
                 )
-            #self.task_info["followed_path"].append(self.env.get_agent_location())
+            # self.task_info["followed_path"].append(self.env.get_agent_location())
 
         step_result = RLStepResult(
             observation=self.get_observations(),
             reward=self.judge(),
             done=self.is_done(),
-            info={"last_action_success": self.last_action_success, "action": action_str},
+            info={
+                "last_action_success": self.last_action_success,
+                "action": action_str,
+            },
         )
         return step_result
 
@@ -162,7 +165,6 @@ def is_goal_object_visible(self) -> bool:
             for o in self.env.visible_objects()
         )
 
-
     def shaping(self) -> float:
         rew = 0.0
 
@@ -201,7 +203,6 @@ def shaping(self) -> float:
             * self.reward_configs["shaping_weight"]
         )
 
-
     def judge(self) -> float:
         """Judge the last event."""
         reward = self.reward_configs["step_penalty"]
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 4ab8025c3..8aa08d405 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -80,10 +80,14 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
     )
 
     TRAIN_SCENES = [
-        scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)
+        os.path.basename(scene).split(".")[0] for scene in glob.glob(train_path)
+    ]
+    VALID_SCENES = [
+        os.path.basename(scene).split(".")[0] for scene in glob.glob(val_path)
+    ]
+    TEST_SCENES = [
+        os.path.basename(scene).split(".")[0] for scene in glob.glob(test_path)
     ]
-    VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)]
-    TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)]
 
     # Setting up sensors and basic environment details
     CAMERA_WIDTH = 400
@@ -123,7 +127,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig):
         "rotate_step_degrees": 30,
         "visibility_distance": 1.0,
         "grid_size": 0.25,
-        "snap_to_grid": False
+        "snap_to_grid": False,
     }
 
     MAX_STEPS = 500

From 5f86224f5ad9539ebc8469b5c6eaf192f81baff0 Mon Sep 17 00:00:00 2001
From: Kshitij Dwivedi <kshitijd@luca-workstation>
Date: Fri, 27 Aug 2021 02:43:57 -0700
Subject: [PATCH 17/17] LGTM warnings addressed

---
 .../ithor_plugin/ithor_environment.py           | 16 +++-------------
 .../ithor_plugin/ithor_task_samplers.py         |  2 +-
 .../tutorials/object_nav_ithor_ppo_baseline.py  | 17 ++++-------------
 3 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py
index ffde80e93..c77b08ada 100644
--- a/allenact_plugins/ithor_plugin/ithor_environment.py
+++ b/allenact_plugins/ithor_plugin/ithor_environment.py
@@ -16,11 +16,7 @@
 from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV
 from allenact_plugins.ithor_plugin.ithor_util import round_to_factor
 from ai2thor.util import metrics
-from allenact.utils.cache_utils import (
-    DynamicDistanceCache,
-    pos_to_str_for_cache,
-    str_to_pos_for_cache,
-)
+from allenact.utils.cache_utils import DynamicDistanceCache
 
 
 class IThorEnvironment(object):
@@ -282,10 +278,7 @@ def last_action_return(self, value: Any) -> None:
         self.controller.last_event.metadata["actionReturn"] = value
 
     def start(
-        self,
-        scene_name: Optional[str],
-        move_mag: float = 0.25,
-        **kwargs,
+        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
     ) -> None:
         """Starts the ai2thor controller if it was previously stopped.
 
@@ -340,10 +333,7 @@ def stop(self) -> None:
             self._started = False
 
     def reset(
-        self,
-        scene_name: Optional[str],
-        move_mag: float = 0.25,
-        **kwargs,
+        self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs,
     ):
         """Resets the ai2thor in a new scene.
 
diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
index ca83de9be..19d0f4e6a 100644
--- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py
+++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py
@@ -2,7 +2,7 @@
 import random
 import gzip
 import json
-from typing import List, Optional, Union, Dict, Any, cast, Tuple
+from typing import List, Optional, Union, Dict, Any, cast
 
 import gym
 
diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py
index 8aa08d405..ed7a3d68c 100644
--- a/projects/tutorials/object_nav_ithor_ppo_baseline.py
+++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py
@@ -1,5 +1,5 @@
 from math import ceil
-from typing import Dict, Any, List, Optional, Sequence, Union
+from typing import Dict, Any, List, Optional, Sequence
 import glob
 import os
 import gym
@@ -10,10 +10,8 @@
 from torch.optim.lr_scheduler import LambdaLR
 from torchvision import models
 
-from allenact.base_abstractions.preprocessor import (
-    Preprocessor,
-    SensorPreprocessorGraph,
-)
+from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph
+
 from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
 from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor
 from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor
@@ -35,12 +33,10 @@
     GoalObjectTypeThorSensor,
 )
 from allenact_plugins.ithor_plugin.ithor_task_samplers import (
-    ObjectNavTaskSampler,
     ObjectNaviThorDatasetTaskSampler,
 )
 from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask
 from projects.objectnav_baselines.models.object_nav_models import (
-    ObjectNavBaselineActorCritic,
     ResnetTensorObjectNavActorCritic,
 )
 
@@ -179,10 +175,7 @@ def training_pipeline(cls, **kwargs):
             gae_lambda=gae_lambda,
             advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD,
             pipeline_stages=[
-                PipelineStage(
-                    loss_names=["ppo_loss"],
-                    max_stage_steps=ppo_steps,
-                ),
+                PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,),
             ],
             lr_scheduler_builder=Builder(
                 LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)}
@@ -191,8 +184,6 @@ def training_pipeline(cls, **kwargs):
 
     @classmethod
     def machine_params(cls, mode="train", **kwargs):
-        num_gpus = torch.cuda.device_count()
-        has_gpu = num_gpus != 0
 
         if mode == "train":
             nprocesses = 40