From 53b8ca66d4f77576da1127828214f56fd2e6353c Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Thu, 19 Aug 2021 14:43:53 -0700 Subject: [PATCH 01/17] from locobot to default --- projects/objectnav_baselines/experiments/objectnav_thor_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/objectnav_baselines/experiments/objectnav_thor_base.py b/projects/objectnav_baselines/experiments/objectnav_thor_base.py index 577ea3996..e164657f8 100644 --- a/projects/objectnav_baselines/experiments/objectnav_thor_base.py +++ b/projects/objectnav_baselines/experiments/objectnav_thor_base.py @@ -89,7 +89,7 @@ def env_args(cls): visibilityDistance=cls.VISIBILITY_DISTANCE, gridSize=cls.STEP_SIZE, snapToGrid=False, - agentMode="locobot", + agentMode="default", fieldOfView=horizontal_to_vertical_fov( horizontal_fov_in_degrees=cls.HORIZONTAL_FIELD_OF_VIEW, width=cls.CAMERA_WIDTH, From 877f7a574bc87bcc22b63df969de72bd6f9b6e5f Mon Sep 17 00:00:00 2001 From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com> Date: Mon, 23 Aug 2021 16:45:15 +0200 Subject: [PATCH 02/17] Objectnav iTHOR default agent baseline --- .../ithor_plugin/ithor_environment.py | 3 +- .../object_nav_ithor_ppo_baseline.py | 309 ++++++++++++++++++ 2 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 projects/tutorials/object_nav_ithor_ppo_baseline.py diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 50c5db406..6d2815fe8 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -43,6 +43,7 @@ def __init__( make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, + **kwargs ) -> None: """Initializer. @@ -96,7 +97,7 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics - self.start(None) + self.start(**kwargs) # noinspection PyTypeHints self.controller.docker_enabled = docker_enabled # type: ignore diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py new file mode 100644 index 000000000..35685ef0f --- /dev/null +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -0,0 +1,309 @@ +from math import ceil +from typing import Dict, Any, List, Optional + +import gym +import numpy as np +import torch +import torch.nn as nn +import torch.optim as optim +from torch.optim.lr_scheduler import LambdaLR + +from allenact.utils.experiment_utils import evenly_distribute_count_into_bins +from allenact.algorithms.onpolicy_sync.losses import PPO +from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig +from allenact.base_abstractions.experiment_config import ExperimentConfig, MachineParams +from allenact.base_abstractions.sensor import SensorSuite +from allenact.base_abstractions.task import TaskSampler +from allenact.utils.experiment_utils import ( + Builder, + PipelineStage, + TrainingPipeline, + LinearDecay, +) +from allenact_plugins.ithor_plugin.ithor_sensors import ( + RGBSensorThor, + GoalObjectTypeThorSensor, +) +from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler +from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask +from projects.objectnav_baselines.models.object_nav_models import ( + ObjectNavBaselineActorCritic, +) + + +class ObjectNavThorPPOExperimentConfig(ExperimentConfig): + """A simple object navigation experiment in THOR. + + Training with PPO. + """ + + # A simple setting, train/valid/test are all the same single scene + # and we're looking for a single object + OBJECT_TYPES = sorted( + [ + "AlarmClock", + "Apple", + "Book", + "Bowl", + "Box", + "Candle", + "GarbageCan", + "HousePlant", + "Laptop", + "SoapBottle", + "Television", + "Toaster", + ] + ) + train_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz") + val_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz") + test_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz") + + TRAIN_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)] + VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)] + TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)] + + # Setting up sensors and basic environment details + CAMERA_WIDTH = 400 + CAMERA_HEIGHT = 300 + SCREEN_SIZE = 224 + SENSORS = [ + RGBSensorThor( + height=SCREEN_SIZE, + width=SCREEN_SIZE, + use_resnet_normalization=True, + ), + GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), + ] + + ENV_ARGS = { + "player_screen_height": CAMERA_WIDTH, + "player_screen_width": CAMERA_HEIGHT, + "quality": "Very Low", + "rotateStepDegrees": 30, + "visibilityDistance"=1.0, + "gridSize" =0.25, + } + + MAX_STEPS = 128 + ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None + VALID_SAMPLES_IN_SCENE = 10 + TEST_SAMPLES_IN_SCENE = 100 + + + DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count())) + DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,) + DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,) + + @classmethod + def tag(cls): + return "ObjectNavThorPPO" + + @classmethod + def training_pipeline(cls, **kwargs): + ppo_steps = int(300000000) + lr = 3e-4 + num_mini_batch = 1 if not torch.cuda.is_available() else 6 + update_repeats = 4 + num_steps = 128 + metric_accumulate_interval = 10000 # Log every 10 max length tasks + save_interval = 5000000 + gamma = 0.99 + use_gae = True + gae_lambda = 0.95 + max_grad_norm = 0.5 + + return TrainingPipeline( + save_interval=save_interval, + metric_accumulate_interval=metric_accumulate_interval, + optimizer_builder=Builder(optim.Adam, dict(lr=lr)), + num_mini_batch=num_mini_batch, + update_repeats=update_repeats, + max_grad_norm=max_grad_norm, + num_steps=num_steps, + named_losses={ + "ppo_loss": PPO(clip_decay=LinearDecay(ppo_steps), **PPOConfig), + }, + gamma=gamma, + use_gae=use_gae, + gae_lambda=gae_lambda, + advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, + pipeline_stages=[ + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=ppo_steps, + ), + ], + lr_scheduler_builder=Builder( + LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} + ), + ) + + @classmethod + def machine_params(cls, mode="train", **kwargs): + num_gpus = torch.cuda.device_count() + has_gpu = num_gpus != 0 + + if mode == "train": + nprocesses = 40 + workers_per_device = 1 + devices = ( + [torch.device("cpu")] + if not torch.cuda.is_available() + else cls.DEFAULT_TRAIN_GPU_IDS * workers_per_device + ) + nprocesses = evenly_distribute_count_into_bins( + nprocesses, max(len(devices), 1) + ) + sampler_devices = cls.DEFAULT_TRAIN_GPU_IDS + elif mode == "valid": + nprocesses = 1 + gpu_ids = [] if not torch.cuda.is_available() else cls.DEFAULT_VALID_GPU_IDS + elif mode == "test": + nprocesses = 1 + gpu_ids = [] if not torch.cuda.is_available() else cls.DEFAULT_TEST_GPU_IDS + else: + raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") + + return MachineParams( + nprocesses=nprocesses, + devices=gpu_ids, + ) + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return ObjectNavBaselineActorCritic( + action_space=gym.spaces.Discrete( + len(ObjectNaviThorGridTask.class_action_names()) + ), + observation_space=SensorSuite(cls.SENSORS).observation_spaces, + rgb_uuid=cls.SENSORS[0].uuid, + depth_uuid=None, + goal_sensor_uuid="goal_object_type_ind", + hidden_size=512, + object_type_embedding_dim=8, + ) + + @classmethod + def make_sampler_fn(cls, **kwargs) -> TaskSampler: + return ObjectNavTaskSampler(**kwargs) + + @staticmethod + def _partition_inds(n: int, num_parts: int): + return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( + np.int32 + ) + + def _get_sampler_args_for_scene_split( + self, + scenes: List[str], + process_ind: int, + total_processes: int, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + if total_processes > len(scenes): # oversample some scenes -> bias + if total_processes % len(scenes) != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisible by the number of scenes" + ) + scenes = scenes * int(ceil(total_processes / len(scenes))) + scenes = scenes[: total_processes * (len(scenes) // total_processes)] + else: + if len(scenes) % total_processes != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisor of the number of scenes" + ) + inds = self._partition_inds(len(scenes), total_processes) + + return { + "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], + "object_types": self.OBJECT_TYPES, + "env_args": self.ENV_ARGS, + "max_steps": self.MAX_STEPS, + "sensors": self.SENSORS, + "action_space": gym.spaces.Discrete( + len(ObjectNaviThorGridTask.class_action_names()) + ), + "seed": seeds[process_ind] if seeds is not None else None, + "deterministic_cudnn": deterministic_cudnn, + } + + def train_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TRAIN_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = "manual" + res["env_args"] = {} + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) + if devices is not None and len(devices) > 0 + else None + ) + return res + + def valid_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.VALID_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = self.VALID_SAMPLES_IN_SCENE + res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) + res["env_args"] = {} + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) + if devices is not None and len(devices) > 0 + else None + ) + return res + + def test_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TEST_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = self.TEST_SAMPLES_IN_SCENE + res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) + res["env_args"] = {} + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) + if devices is not None and len(devices) > 0 + else None + ) + return res From 0d33b2b6049724910548c9e3c1e236c7638faf08 Mon Sep 17 00:00:00 2001 From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com> Date: Mon, 23 Aug 2021 16:50:36 +0200 Subject: [PATCH 03/17] Update object_nav_ithor_ppo_baseline.py --- projects/tutorials/object_nav_ithor_ppo_baseline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 35685ef0f..8de90f84c 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -81,8 +81,8 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): "player_screen_width": CAMERA_HEIGHT, "quality": "Very Low", "rotateStepDegrees": 30, - "visibilityDistance"=1.0, - "gridSize" =0.25, + "visibilityDistance": 1.0, + "gridSize": 0.25, } MAX_STEPS = 128 From 66a5b032a6bfd37336bb48e27c7d3a70f81c1fc2 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Mon, 23 Aug 2021 08:48:16 -0700 Subject: [PATCH 04/17] added args to ithor controller --- .../ithor_plugin/ithor_environment.py | 15 ++++++++++++--- .../tutorials/object_nav_ithor_ppo_baseline.py | 6 ++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 6d2815fe8..7a9227541 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -38,11 +38,15 @@ def __init__( fov: float = FOV, player_screen_width: int = 300, player_screen_height: int = 300, + gridSize: float = 0.15, + rotateStepDegrees: int = 30, + visibilityDistance: float = 1.0, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, + **kwargs ) -> None: """Initializer. @@ -86,8 +90,10 @@ def __init__( self._initially_reachable_points: Optional[List[Dict]] = None self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None self._move_mag: Optional[float] = None - self._grid_size: Optional[float] = None - self._visibility_distance = visibility_distance + self._grid_size: Optional[float] = gridSize + print("grid size is ",self._grid_size ) + self._rotate_step_degrees = rotateStepDegrees + self._visibility_distance = visibilityDistance self._fov = fov self.restrict_to_initially_reachable_points = ( restrict_to_initially_reachable_points @@ -97,7 +103,7 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics - self.start(**kwargs) + self.start(None) # noinspection PyTypeHints self.controller.docker_enabled = docker_enabled # type: ignore @@ -192,6 +198,9 @@ def start( local_executable_path=self._local_thor_build, quality=self._quality, server_class=ai2thor.fifo_server.FifoServer, + gridSize = self._grid_size, + rotateStepDegrees = self._rotate_step_degrees, + visibilityDistance = self._visibility_distance , ) if ( diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 8de90f84c..b50f0e059 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -1,6 +1,8 @@ from math import ceil from typing import Dict, Any, List, Optional +import glob +import os import gym import numpy as np import torch @@ -147,13 +149,13 @@ def machine_params(cls, mode="train", **kwargs): if mode == "train": nprocesses = 40 workers_per_device = 1 - devices = ( + gpu_ids = ( [torch.device("cpu")] if not torch.cuda.is_available() else cls.DEFAULT_TRAIN_GPU_IDS * workers_per_device ) nprocesses = evenly_distribute_count_into_bins( - nprocesses, max(len(devices), 1) + nprocesses, max(len(gpu_ids), 1) ) sampler_devices = cls.DEFAULT_TRAIN_GPU_IDS elif mode == "valid": From 668b3cadd1880e5aad42c88d617d25422a73d3aa Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Tue, 24 Aug 2021 06:38:08 -0700 Subject: [PATCH 05/17] dataset task sampler for ithor objectnav default --- .../ithor_plugin/ithor_environment.py | 41 +++- .../ithor_plugin/ithor_task_samplers.py | 216 +++++++++++++++++- .../experiments/objectnav_thor_base.py | 2 +- .../object_nav_ithor_ppo_baseline.py | 7 +- 4 files changed, 261 insertions(+), 5 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 7a9227541..09e5ba3fe 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -46,7 +46,7 @@ def __init__( make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, - + snap_to_grid: bool = False, **kwargs ) -> None: """Initializer. @@ -86,6 +86,7 @@ def __init__( self.controller: Optional[Controller] = None self._started = False self._quality = quality + self._snap_to_grid = snap_to_grid self._initially_reachable_points: Optional[List[Dict]] = None self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None @@ -106,6 +107,9 @@ def __init__( self.start(None) # noinspection PyTypeHints self.controller.docker_enabled = docker_enabled # type: ignore + self._extra_teleport_kwargs: Dict[ + str, Any + ] = {} # Used for backwards compatability with the teleport action @property def scene_name(self) -> str: @@ -196,6 +200,7 @@ def start( width=self._start_player_screen_width, height=self._start_player_screen_height, local_executable_path=self._local_thor_build, + snapToGrid=self._snap_to_grid, quality=self._quality, server_class=ai2thor.fifo_server.FifoServer, gridSize = self._grid_size, @@ -738,6 +743,40 @@ def step( return sr + def set_object_filter(self, object_ids: List[str]): + self.controller.step("SetObjectFilter", objectIds=object_ids, renderImage=False) + + def reset_object_filter(self): + self.controller.step("ResetObjectFilter", renderImage=False) + + + def teleport( + self, + pose: Dict[str, float], + rotation: Dict[str, float], + horizon: float = 0.0, + ): + try: + e = self.controller.step( + action="TeleportFull", + x=pose["x"], + y=pose["y"], + z=pose["z"], + rotation=rotation, + horizon=horizon, + **self._extra_teleport_kwargs, + ) + except ValueError as e: + if len(self._extra_teleport_kwargs) == 0: + self._extra_teleport_kwargs["standing"] = True + else: + raise e + return self.teleport( + pose=pose, rotation=rotation, horizon=horizon + ) + return e.metadata["lastActionSuccess"] + + @staticmethod def position_dist( p0: Mapping[str, Any], diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index e43b699af..005c23dc2 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -1,6 +1,8 @@ import copy import random -from typing import List, Dict, Optional, Any, Union, cast +import gzip +import json +from typing import List, Optional, Union, Dict, Any, cast, Tuple import gym @@ -198,3 +200,215 @@ def set_seed(self, seed: int): self.seed = seed if seed is not None: set_seed(seed) + +class ObjectNavDatasetTaskSampler(TaskSampler): + def __init__( + self, + scenes: List[str], + scene_directory: str, + sensors: List[Sensor], + max_steps: int, + env_args: Dict[str, Any], + action_space: gym.Space, + #rewards_config: Dict, + seed: Optional[int] = None, + deterministic_cudnn: bool = False, + loop_dataset: bool = True, + allow_flipping=False, + env_class=IThorEnvironment, + **kwargs, + ) -> None: + #self.rewards_config = rewards_config + self.env_args = env_args + self.scenes = scenes + self.episodes = { + scene: ObjectNavDatasetTaskSampler.load_dataset( + scene, scene_directory + ) + for scene in scenes + } + self.env_class = env_class + self.object_types = [ + ep["object_type"] for scene in self.episodes for ep in self.episodes[scene] + ] + self.env: Optional[IThorEnvironment] = None + self.sensors = sensors + self.max_steps = max_steps + self._action_space = action_space + self.allow_flipping = allow_flipping + self.scene_counter: Optional[int] = None + self.scene_order: Optional[List[str]] = None + self.scene_id: Optional[int] = None + # get the total number of tasks assigned to this process + if loop_dataset: + self.max_tasks = None + else: + self.max_tasks = sum(len(self.episodes[scene]) for scene in self.episodes) + self.reset_tasks = self.max_tasks + self.scene_index = 0 + self.episode_index = 0 + + self._last_sampled_task: Optional[ObjectNaviThorGridTask] = None + + self.seed: Optional[int] = None + self.set_seed(seed) + + if deterministic_cudnn: + set_deterministic_cudnn() + + self.reset() + + def _create_environment(self) -> IThorEnvironment: + env = self.env_class( make_agents_visible=False, + object_open_speed=0.05, + restrict_to_initially_reachable_points=False, + **self.env_args) + return env + + @staticmethod + def load_dataset(scene: str, base_directory: str) -> List[Dict]: + filename = ( + "/".join([base_directory, scene]) + if base_directory[-1] != "/" + else "".join([base_directory, scene]) + ) + filename += ".json.gz" + fin = gzip.GzipFile(filename, "r") + json_bytes = fin.read() + fin.close() + json_str = json_bytes.decode("utf-8") + data = json.loads(json_str) + random.shuffle(data) + return data + + @staticmethod + def load_distance_cache_from_file(scene: str, base_directory: str) -> Dict: + filename = ( + "/".join([base_directory, scene]) + if base_directory[-1] != "/" + else "".join([base_directory, scene]) + ) + filename += ".json.gz" + fin = gzip.GzipFile(filename, "r") + json_bytes = fin.read() + fin.close() + json_str = json_bytes.decode("utf-8") + data = json.loads(json_str) + return data + + @property + def __len__(self) -> Union[int, float]: + """Length. + + # Returns + + Number of total tasks remaining that can be sampled. Can be float('inf'). + """ + return float("inf") if self.max_tasks is None else self.max_tasks + + @property + def total_unique(self) -> Optional[Union[int, float]]: + return self.reset_tasks + + @property + def last_sampled_task(self) -> Optional[ObjectNaviThorGridTask]: + return self._last_sampled_task + + def close(self) -> None: + if self.env is not None: + self.env.stop() + + @property + def all_observation_spaces_equal(self) -> bool: + """Check if observation spaces equal. + + # Returns + + True if all Tasks that can be sampled by this sampler have the + same observation space. Otherwise False. + """ + return True + + @property + def length(self) -> Union[int, float]: + """Length. + + # Returns + + Number of total tasks remaining that can be sampled. Can be float('inf'). + """ + return float("inf") if self.max_tasks is None else self.max_tasks + + def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviThorGridTask]: + if self.max_tasks is not None and self.max_tasks <= 0: + return None + + if self.episode_index >= len(self.episodes[self.scenes[self.scene_index]]): + self.scene_index = (self.scene_index + 1) % len(self.scenes) + # shuffle the new list of episodes to train on + random.shuffle(self.episodes[self.scenes[self.scene_index]]) + self.episode_index = 0 + scene = self.scenes[self.scene_index] + episode = self.episodes[scene][self.episode_index] + if self.env is None: + self.env = self._create_environment() + + if scene.replace("_physics", "") != self.env.scene_name.replace("_physics", ""): + self.env.reset(scene_name=scene) + else: + self.env.reset_object_filter() + + self.env.set_object_filter( + object_ids=[ + o["objectId"] + for o in self.env.last_event.metadata["objects"] + if o["objectType"] == episode["object_type"] + ] + ) + + task_info = {"scene": scene, "object_type": episode["object_type"]} + if len(task_info) == 0: + get_logger().warning( + "Scene {} does not contain any" + " objects of any of the types {}.".format(scene, self.object_types) + ) + task_info["initial_position"] = episode["initial_position"] + task_info["initial_orientation"] = episode["initial_orientation"] + task_info["initial_horizon"] = episode.get("initial_horizon", 0) + task_info["distance_to_target"] = episode.get("shortest_path_length") + task_info["path_to_target"] = episode.get("shortest_path") + task_info["object_type"] = episode["object_type"] + task_info["id"] = episode["id"] + if self.allow_flipping and random.random() > 0.5: + task_info["mirrored"] = True + else: + task_info["mirrored"] = False + + self.episode_index += 1 + if self.max_tasks is not None: + self.max_tasks -= 1 + if not self.env.teleport( + pose=episode["initial_position"], + rotation=episode["initial_orientation"], + horizon=episode.get("initial_horizon", 0), + ): + return self.next_task() + self._last_sampled_task = ObjectNaviThorGridTask( + env=self.env, + sensors=self.sensors, + task_info=task_info, + max_steps=self.max_steps, + action_space=self._action_space, + ) + + return self._last_sampled_task + + def reset(self): + self.episode_index = 0 + self.scene_index = 0 + self.max_tasks = self.reset_tasks + + def set_seed(self, seed: int): + self.seed = seed + if seed is not None: + set_seed(seed) \ No newline at end of file diff --git a/projects/objectnav_baselines/experiments/objectnav_thor_base.py b/projects/objectnav_baselines/experiments/objectnav_thor_base.py index dd6d57e84..d147c90c7 100644 --- a/projects/objectnav_baselines/experiments/objectnav_thor_base.py +++ b/projects/objectnav_baselines/experiments/objectnav_thor_base.py @@ -89,7 +89,7 @@ def env_args(cls): visibilityDistance=cls.VISIBILITY_DISTANCE, gridSize=cls.STEP_SIZE, snapToGrid=False, - agentMode="default", + agentMode="locobot", fieldOfView=horizontal_to_vertical_fov( horizontal_fov_in_degrees=cls.HORIZONTAL_FIELD_OF_VIEW, width=cls.CAMERA_WIDTH, diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index b50f0e059..b2b56e30f 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -26,7 +26,7 @@ RGBSensorThor, GoalObjectTypeThorSensor, ) -from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler +from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler,ObjectNavDatasetTaskSampler from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( ObjectNavBaselineActorCritic, @@ -188,7 +188,7 @@ def create_model(cls, **kwargs) -> nn.Module: @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: - return ObjectNavTaskSampler(**kwargs) + return ObjectNavDatasetTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): @@ -248,6 +248,7 @@ def train_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) + res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes") res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) @@ -273,6 +274,7 @@ def valid_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) + res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes") res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} @@ -299,6 +301,7 @@ def test_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) + res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes") res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} From 7d8a136f9c2cda55947fcdc9f312ae07c9bb9216 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Tue, 24 Aug 2021 07:22:24 -0700 Subject: [PATCH 06/17] updated model to ResnetGRU --- .../object_nav_ithor_ppo_baseline.py | 75 ++++++++++++++++--- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index b2b56e30f..eaeaf7746 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -1,6 +1,5 @@ from math import ceil -from typing import Dict, Any, List, Optional - +from typing import Dict, Any, List, Optional, Sequence, Union import glob import os import gym @@ -9,7 +8,13 @@ import torch.nn as nn import torch.optim as optim from torch.optim.lr_scheduler import LambdaLR +from torchvision import models +from allenact.base_abstractions.preprocessor import Preprocessor, SensorPreprocessorGraph +from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor +from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor +from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor +from allenact.utils.experiment_utils import Builder from allenact.utils.experiment_utils import evenly_distribute_count_into_bins from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig @@ -30,6 +35,7 @@ from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( ObjectNavBaselineActorCritic, + ResnetTensorObjectNavActorCritic ) @@ -74,10 +80,29 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): height=SCREEN_SIZE, width=SCREEN_SIZE, use_resnet_normalization=True, + uuid="rgb_lowres", ), GoalObjectTypeThorSensor(object_types=OBJECT_TYPES), ] + PREPROCESSORS = [ + Builder( + ResNetPreprocessor, + { + "input_height": SCREEN_SIZE, + "input_width": SCREEN_SIZE, + "output_width": 7, + "output_height": 7, + "output_dims": 512, + "pool": False, + "torchvision_resnet_model": models.resnet18, + "input_uuids": ["rgb_lowres"], + "output_uuid": "rgb_resnet", + }, + ), + ] + + ENV_ARGS = { "player_screen_height": CAMERA_WIDTH, "player_screen_width": CAMERA_HEIGHT, @@ -99,7 +124,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): @classmethod def tag(cls): - return "ObjectNavThorPPO" + return "ObjectNavThorPPOResnetGRU" @classmethod def training_pipeline(cls, **kwargs): @@ -167,23 +192,49 @@ def machine_params(cls, mode="train", **kwargs): else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") + + sensor_preprocessor_graph = ( + SensorPreprocessorGraph( + source_observation_spaces=SensorSuite(cls.SENSORS).observation_spaces, + preprocessors=cls.PREPROCESSORS, + ) + if mode == "train" + or ( + (isinstance(nprocesses, int) and nprocesses > 0) + or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) + ) + else None + ) + return MachineParams( nprocesses=nprocesses, devices=gpu_ids, + sampler_devices=sampler_devices + if mode == "train" + else gpu_ids, # ignored with > 1 gpu_ids + sensor_preprocessor_graph=sensor_preprocessor_graph ) + + + @classmethod def create_model(cls, **kwargs) -> nn.Module: - return ObjectNavBaselineActorCritic( - action_space=gym.spaces.Discrete( - len(ObjectNaviThorGridTask.class_action_names()) - ), - observation_space=SensorSuite(cls.SENSORS).observation_spaces, - rgb_uuid=cls.SENSORS[0].uuid, - depth_uuid=None, - goal_sensor_uuid="goal_object_type_ind", + has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS) + has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS) + goal_sensor_uuid = next( + (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), + None, + ) + + return ResnetTensorObjectNavActorCritic( + action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + goal_sensor_uuid=goal_sensor_uuid, + rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None, + depth_resnet_preprocessor_uuid="depth_resnet" if has_depth else None, hidden_size=512, - object_type_embedding_dim=8, + goal_dims=32, ) @classmethod From 24fb1d192e3caf8d94480c741eba7a137702ebf1 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Tue, 24 Aug 2021 08:00:36 -0700 Subject: [PATCH 07/17] autoformatted ithorObjectnav default agentmode --- .../ithor_plugin/ithor_environment.py | 16 ++-- .../ithor_plugin/ithor_task_samplers.py | 23 ++--- .../object_nav_ithor_ppo_baseline.py | 88 +++++++++++-------- 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 09e5ba3fe..342d4f0f8 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -47,7 +47,7 @@ def __init__( object_open_speed: float = 1.0, simplify_physics: bool = False, snap_to_grid: bool = False, - **kwargs + **kwargs, ) -> None: """Initializer. @@ -92,7 +92,7 @@ def __init__( self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None self._move_mag: Optional[float] = None self._grid_size: Optional[float] = gridSize - print("grid size is ",self._grid_size ) + print("grid size is ", self._grid_size) self._rotate_step_degrees = rotateStepDegrees self._visibility_distance = visibilityDistance self._fov = fov @@ -203,9 +203,9 @@ def start( snapToGrid=self._snap_to_grid, quality=self._quality, server_class=ai2thor.fifo_server.FifoServer, - gridSize = self._grid_size, - rotateStepDegrees = self._rotate_step_degrees, - visibilityDistance = self._visibility_distance , + gridSize=self._grid_size, + rotateStepDegrees=self._rotate_step_degrees, + visibilityDistance=self._visibility_distance, ) if ( @@ -749,7 +749,6 @@ def set_object_filter(self, object_ids: List[str]): def reset_object_filter(self): self.controller.step("ResetObjectFilter", renderImage=False) - def teleport( self, pose: Dict[str, float], @@ -771,12 +770,9 @@ def teleport( self._extra_teleport_kwargs["standing"] = True else: raise e - return self.teleport( - pose=pose, rotation=rotation, horizon=horizon - ) + return self.teleport(pose=pose, rotation=rotation, horizon=horizon) return e.metadata["lastActionSuccess"] - @staticmethod def position_dist( p0: Mapping[str, Any], diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index 005c23dc2..3e31a0c1b 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -201,6 +201,7 @@ def set_seed(self, seed: int): if seed is not None: set_seed(seed) + class ObjectNavDatasetTaskSampler(TaskSampler): def __init__( self, @@ -210,7 +211,7 @@ def __init__( max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, - #rewards_config: Dict, + # rewards_config: Dict, seed: Optional[int] = None, deterministic_cudnn: bool = False, loop_dataset: bool = True, @@ -218,13 +219,11 @@ def __init__( env_class=IThorEnvironment, **kwargs, ) -> None: - #self.rewards_config = rewards_config + # self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.episodes = { - scene: ObjectNavDatasetTaskSampler.load_dataset( - scene, scene_directory - ) + scene: ObjectNavDatasetTaskSampler.load_dataset(scene, scene_directory) for scene in scenes } self.env_class = env_class @@ -259,10 +258,12 @@ def __init__( self.reset() def _create_environment(self) -> IThorEnvironment: - env = self.env_class( make_agents_visible=False, + env = self.env_class( + make_agents_visible=False, object_open_speed=0.05, restrict_to_initially_reachable_points=False, - **self.env_args) + **self.env_args, + ) return env @staticmethod @@ -339,7 +340,9 @@ def length(self) -> Union[int, float]: """ return float("inf") if self.max_tasks is None else self.max_tasks - def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviThorGridTask]: + def next_task( + self, force_advance_scene: bool = False + ) -> Optional[ObjectNaviThorGridTask]: if self.max_tasks is not None and self.max_tasks <= 0: return None @@ -400,7 +403,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[ObjectNaviTho max_steps=self.max_steps, action_space=self._action_space, ) - + return self._last_sampled_task def reset(self): @@ -411,4 +414,4 @@ def reset(self): def set_seed(self, seed: int): self.seed = seed if seed is not None: - set_seed(seed) \ No newline at end of file + set_seed(seed) diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index eaeaf7746..2946db433 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -10,7 +10,10 @@ from torch.optim.lr_scheduler import LambdaLR from torchvision import models -from allenact.base_abstractions.preprocessor import Preprocessor, SensorPreprocessorGraph +from allenact.base_abstractions.preprocessor import ( + Preprocessor, + SensorPreprocessorGraph, +) from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor @@ -31,11 +34,14 @@ RGBSensorThor, GoalObjectTypeThorSensor, ) -from allenact_plugins.ithor_plugin.ithor_task_samplers import ObjectNavTaskSampler,ObjectNavDatasetTaskSampler +from allenact_plugins.ithor_plugin.ithor_task_samplers import ( + ObjectNavTaskSampler, + ObjectNavDatasetTaskSampler, +) from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( ObjectNavBaselineActorCritic, - ResnetTensorObjectNavActorCritic + ResnetTensorObjectNavActorCritic, ) @@ -48,26 +54,34 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): # A simple setting, train/valid/test are all the same single scene # and we're looking for a single object OBJECT_TYPES = sorted( - [ - "AlarmClock", - "Apple", - "Book", - "Bowl", - "Box", - "Candle", - "GarbageCan", - "HousePlant", - "Laptop", - "SoapBottle", - "Television", - "Toaster", - ] - ) - train_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz") - val_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz") - test_path = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz") - - TRAIN_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path)] + [ + "AlarmClock", + "Apple", + "Book", + "Bowl", + "Box", + "Candle", + "GarbageCan", + "HousePlant", + "Laptop", + "SoapBottle", + "Television", + "Toaster", + ] + ) + train_path = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/train/episodes", "*.json.gz" + ) + val_path = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz" + ) + test_path = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/val/episodes", "*.json.gz" + ) + + TRAIN_SCENES = [ + scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path) + ] VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)] TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)] @@ -102,14 +116,13 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ), ] - ENV_ARGS = { "player_screen_height": CAMERA_WIDTH, "player_screen_width": CAMERA_HEIGHT, "quality": "Very Low", "rotateStepDegrees": 30, "visibilityDistance": 1.0, - "gridSize": 0.25, + "gridSize": 0.25, } MAX_STEPS = 128 @@ -117,7 +130,6 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): VALID_SAMPLES_IN_SCENE = 10 TEST_SAMPLES_IN_SCENE = 100 - DEFAULT_TRAIN_GPU_IDS = tuple(range(torch.cuda.device_count())) DEFAULT_VALID_GPU_IDS = (torch.cuda.device_count() - 1,) DEFAULT_TEST_GPU_IDS = (torch.cuda.device_count() - 1,) @@ -133,7 +145,7 @@ def training_pipeline(cls, **kwargs): num_mini_batch = 1 if not torch.cuda.is_available() else 6 update_repeats = 4 num_steps = 128 - metric_accumulate_interval = 10000 # Log every 10 max length tasks + metric_accumulate_interval = 10000 # Log every 10 max length tasks save_interval = 5000000 gamma = 0.99 use_gae = True @@ -192,7 +204,6 @@ def machine_params(cls, mode="train", **kwargs): else: raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") - sensor_preprocessor_graph = ( SensorPreprocessorGraph( source_observation_spaces=SensorSuite(cls.SENSORS).observation_spaces, @@ -212,12 +223,9 @@ def machine_params(cls, mode="train", **kwargs): sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids - sensor_preprocessor_graph=sensor_preprocessor_graph + sensor_preprocessor_graph=sensor_preprocessor_graph, ) - - - @classmethod def create_model(cls, **kwargs) -> nn.Module: has_rgb = any(isinstance(s, RGBSensor) for s in cls.SENSORS) @@ -228,7 +236,9 @@ def create_model(cls, **kwargs) -> nn.Module: ) return ResnetTensorObjectNavActorCritic( - action_space=gym.spaces.Discrete(len(ObjectNaviThorGridTask.class_action_names())), + action_space=gym.spaces.Discrete( + len(ObjectNaviThorGridTask.class_action_names()) + ), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, rgb_resnet_preprocessor_uuid="rgb_resnet" if has_rgb else None, @@ -299,7 +309,9 @@ def train_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) - res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/train/episodes") + res["scene_directory"] = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/train/episodes" + ) res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) @@ -325,7 +337,9 @@ def valid_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) - res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes") + res["scene_directory"] = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/val/episodes" + ) res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} @@ -352,7 +366,9 @@ def test_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) - res["scene_directory"] = os.path.join(os.getcwd(), "datasets/ithor-objectnav/val/episodes") + res["scene_directory"] = os.path.join( + os.getcwd(), "datasets/ithor-objectnav/val/episodes" + ) res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} From 37795be0fccc57f3a1af25cf5c4549ce59b6ed46 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Tue, 24 Aug 2021 10:50:33 -0700 Subject: [PATCH 08/17] reset to default iTHOR args --- allenact_plugins/ithor_plugin/ithor_environment.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 342d4f0f8..b5b489f94 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -38,9 +38,9 @@ def __init__( fov: float = FOV, player_screen_width: int = 300, player_screen_height: int = 300, - gridSize: float = 0.15, - rotateStepDegrees: int = 30, - visibilityDistance: float = 1.0, + gridSize: float = 0.25, + rotateStepDegrees: int = 90, + visibilityDistance: float = 1.25, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, make_agents_visible: bool = True, @@ -92,7 +92,6 @@ def __init__( self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None self._move_mag: Optional[float] = None self._grid_size: Optional[float] = gridSize - print("grid size is ", self._grid_size) self._rotate_step_degrees = rotateStepDegrees self._visibility_distance = visibilityDistance self._fov = fov From 6a7264948768e93465e1e499ad113b241ac8f058 Mon Sep 17 00:00:00 2001 From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com> Date: Tue, 24 Aug 2021 22:22:06 +0200 Subject: [PATCH 09/17] restore iTHOR defaults --- allenact_plugins/ithor_plugin/ithor_environment.py | 11 +++++------ projects/tutorials/object_nav_ithor_ppo_baseline.py | 7 ++++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index b5b489f94..0562de0b7 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -39,14 +39,13 @@ def __init__( player_screen_width: int = 300, player_screen_height: int = 300, gridSize: float = 0.25, - rotateStepDegrees: int = 90, - visibilityDistance: float = 1.25, + rotate_step_degrees: int = 90, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, make_agents_visible: bool = True, object_open_speed: float = 1.0, simplify_physics: bool = False, - snap_to_grid: bool = False, + snap_to_grid: bool = True, **kwargs, ) -> None: """Initializer. @@ -91,9 +90,9 @@ def __init__( self._initially_reachable_points: Optional[List[Dict]] = None self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None self._move_mag: Optional[float] = None - self._grid_size: Optional[float] = gridSize - self._rotate_step_degrees = rotateStepDegrees - self._visibility_distance = visibilityDistance + self._grid_size: Optional[float] = grid_size + self._rotate_step_degrees = rotate_step_degrees + self._visibility_distance = visibility_distance self._fov = fov self.restrict_to_initially_reachable_points = ( restrict_to_initially_reachable_points diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 2946db433..09cec2d5b 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -120,9 +120,10 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): "player_screen_height": CAMERA_WIDTH, "player_screen_width": CAMERA_HEIGHT, "quality": "Very Low", - "rotateStepDegrees": 30, - "visibilityDistance": 1.0, - "gridSize": 0.25, + "rotate_step_degrees": 30, + "visibility_distance": 1.0, + "grid_size": 0.25, + "snap_to_grid": False } MAX_STEPS = 128 From 90ef4f5184cf07796b0bf66380157412dfd7a439 Mon Sep 17 00:00:00 2001 From: kshitijd20 <36129805+kshitijd20@users.noreply.github.com> Date: Tue, 24 Aug 2021 22:33:24 +0200 Subject: [PATCH 10/17] looping in val/test sampler to False --- projects/tutorials/object_nav_ithor_ppo_baseline.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 09cec2d5b..998fdb806 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -313,6 +313,7 @@ def train_task_sampler_args( res["scene_directory"] = os.path.join( os.getcwd(), "datasets/ithor-objectnav/train/episodes" ) + res["loop_dataset"] = True res["scene_period"] = "manual" res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) @@ -338,11 +339,10 @@ def valid_task_sampler_args( seeds=seeds, deterministic_cudnn=deterministic_cudnn, ) + res["loop_dataset"] = False res["scene_directory"] = os.path.join( os.getcwd(), "datasets/ithor-objectnav/val/episodes" ) - res["scene_period"] = self.VALID_SAMPLES_IN_SCENE - res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( @@ -370,8 +370,7 @@ def test_task_sampler_args( res["scene_directory"] = os.path.join( os.getcwd(), "datasets/ithor-objectnav/val/episodes" ) - res["scene_period"] = self.TEST_SAMPLES_IN_SCENE - res["max_tasks"] = self.TEST_SAMPLES_IN_SCENE * len(res["scenes"]) + res["loop_dataset"] = False res["env_args"] = {} res["env_args"].update(self.ENV_ARGS) res["env_args"]["x_display"] = ( From a7c7f0d093d3f930d25100aa1e6702aeb89a0273 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Tue, 24 Aug 2021 14:53:14 -0700 Subject: [PATCH 11/17] fixed typos --- allenact_plugins/ithor_plugin/ithor_environment.py | 2 +- projects/tutorials/object_nav_ithor_ppo_baseline.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 0562de0b7..fe745c9f2 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -38,7 +38,7 @@ def __init__( fov: float = FOV, player_screen_width: int = 300, player_screen_height: int = 300, - gridSize: float = 0.25, + grid_size: float = 0.25, rotate_step_degrees: int = 90, quality: str = "Very Low", restrict_to_initially_reachable_points: bool = False, diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 998fdb806..08d448319 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -117,8 +117,8 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ] ENV_ARGS = { - "player_screen_height": CAMERA_WIDTH, - "player_screen_width": CAMERA_HEIGHT, + "player_screen_height": CAMERA_HEIGHT, + "player_screen_width": CAMERA_WIDTH, "quality": "Very Low", "rotate_step_degrees": 30, "visibility_distance": 1.0, From 6a07db0256fc69510277cb1dbcf49affe9cd0828 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Wed, 25 Aug 2021 06:14:00 -0700 Subject: [PATCH 12/17] Dataset Task sampler naming change --- allenact_plugins/ithor_plugin/ithor_task_samplers.py | 4 ++-- projects/tutorials/object_nav_ithor_ppo_baseline.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index 3e31a0c1b..92075b478 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -202,7 +202,7 @@ def set_seed(self, seed: int): set_seed(seed) -class ObjectNavDatasetTaskSampler(TaskSampler): +class ObjectNaviThorDatasetTaskSampler(TaskSampler): def __init__( self, scenes: List[str], @@ -223,7 +223,7 @@ def __init__( self.env_args = env_args self.scenes = scenes self.episodes = { - scene: ObjectNavDatasetTaskSampler.load_dataset(scene, scene_directory) + scene: ObjectNaviThorDatasetTaskSampler.load_dataset(scene, scene_directory) for scene in scenes } self.env_class = env_class diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 08d448319..c2c0a9852 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -36,7 +36,7 @@ ) from allenact_plugins.ithor_plugin.ithor_task_samplers import ( ObjectNavTaskSampler, - ObjectNavDatasetTaskSampler, + ObjectNaviThorDatasetTaskSampler, ) from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( @@ -250,7 +250,7 @@ def create_model(cls, **kwargs) -> nn.Module: @classmethod def make_sampler_fn(cls, **kwargs) -> TaskSampler: - return ObjectNavDatasetTaskSampler(**kwargs) + return ObjectNaviThorDatasetTaskSampler(**kwargs) @staticmethod def _partition_inds(n: int, num_parts: int): From fe3ed2eb4966c167a6dea09c846a337443a10aa4 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Wed, 25 Aug 2021 07:12:13 -0700 Subject: [PATCH 13/17] pose to position --- allenact_plugins/ithor_plugin/ithor_environment.py | 10 +++++----- allenact_plugins/ithor_plugin/ithor_task_samplers.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index fe745c9f2..fce33e4b0 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -749,16 +749,16 @@ def reset_object_filter(self): def teleport( self, - pose: Dict[str, float], + position: Dict[str, float], rotation: Dict[str, float], horizon: float = 0.0, ): try: e = self.controller.step( action="TeleportFull", - x=pose["x"], - y=pose["y"], - z=pose["z"], + x=position["x"], + y=position["y"], + z=position["z"], rotation=rotation, horizon=horizon, **self._extra_teleport_kwargs, @@ -768,7 +768,7 @@ def teleport( self._extra_teleport_kwargs["standing"] = True else: raise e - return self.teleport(pose=pose, rotation=rotation, horizon=horizon) + return self.teleport(position=position, rotation=rotation, horizon=horizon) return e.metadata["lastActionSuccess"] @staticmethod diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index 92075b478..f0c3c9b0d 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -391,7 +391,7 @@ def next_task( if self.max_tasks is not None: self.max_tasks -= 1 if not self.env.teleport( - pose=episode["initial_position"], + position=episode["initial_position"], rotation=episode["initial_orientation"], horizon=episode.get("initial_horizon", 0), ): From be37e2a8128d80b23e6916bdad6bef846f0a508a Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Wed, 25 Aug 2021 10:14:14 -0700 Subject: [PATCH 14/17] adding reward config --- .../ithor_plugin/ithor_task_samplers.py | 5 ++-- allenact_plugins/ithor_plugin/ithor_tasks.py | 30 +++++++++++++++++-- .../object_nav_ithor_ppo_baseline.py | 11 +++++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index f0c3c9b0d..ca83de9be 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -211,7 +211,7 @@ def __init__( max_steps: int, env_args: Dict[str, Any], action_space: gym.Space, - # rewards_config: Dict, + rewards_config: Dict, seed: Optional[int] = None, deterministic_cudnn: bool = False, loop_dataset: bool = True, @@ -219,7 +219,7 @@ def __init__( env_class=IThorEnvironment, **kwargs, ) -> None: - # self.rewards_config = rewards_config + self.rewards_config = rewards_config self.env_args = env_args self.scenes = scenes self.episodes = { @@ -402,6 +402,7 @@ def next_task( task_info=task_info, max_steps=self.max_steps, action_space=self._action_space, + reward_configs=self.rewards_config, ) return self._last_sampled_task diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py index 75670db57..259f81f7f 100644 --- a/allenact_plugins/ithor_plugin/ithor_tasks.py +++ b/allenact_plugins/ithor_plugin/ithor_tasks.py @@ -3,6 +3,7 @@ import gym import numpy as np +import math from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor @@ -65,6 +66,7 @@ def __init__( sensors: List[Sensor], task_info: Dict[str, Any], max_steps: int, + reward_configs: Dict[str, Any], **kwargs, ) -> None: """Initializer. @@ -74,6 +76,9 @@ def __init__( super().__init__( env=env, sensors=sensors, task_info=task_info, max_steps=max_steps, **kwargs ) + + self._rewards: List[float] = [] + self.reward_configs = reward_configs self._took_end_action: bool = False self._success: Optional[bool] = False self._subsampled_locations_from_which_obj_visible: Optional[ @@ -137,15 +142,36 @@ def is_goal_object_visible(self) -> bool: for o in self.env.visible_objects() ) + def dist_to_target(self): + return self.env.distance_to_point(self.task_info["target"]) + + def judge(self) -> float: + """Judge the last event.""" + reward = self.reward_configs["step_penalty"] + + #reward += self.shaping() + + if self._took_end_action: + if self._success: + reward += self.reward_configs["goal_success_reward"] + else: + reward += self.reward_configs["failed_stop_reward"] + elif self.num_steps_taken() + 1 >= self.max_steps: + reward += self.reward_configs.get("reached_max_steps_reward", 0.0) + + self._rewards.append(float(reward)) + return float(reward) + + def judge_old(self) -> float: """Compute the reward after having taken a step.""" reward = -0.01 if not self.last_action_success: - reward += -0.03 + reward += -0.00 if self._took_end_action: - reward += 1.0 if self._success else -1.0 + reward += 10.0 if self._success else -0.0 return float(reward) diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index c2c0a9852..4ab8025c3 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -126,7 +126,13 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): "snap_to_grid": False } - MAX_STEPS = 128 + MAX_STEPS = 500 + REWARD_CONFIG = { + "step_penalty": -0.01, + "goal_success_reward": 10.0, + "failed_stop_reward": 0.0, + "shaping_weight": 1.0, + } ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None VALID_SAMPLES_IN_SCENE = 10 TEST_SAMPLES_IN_SCENE = 100 @@ -137,7 +143,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): @classmethod def tag(cls): - return "ObjectNavThorPPOResnetGRU" + return "ObjectNaviThorPPOResnetGRU" @classmethod def training_pipeline(cls, **kwargs): @@ -293,6 +299,7 @@ def _get_sampler_args_for_scene_split( ), "seed": seeds[process_ind] if seeds is not None else None, "deterministic_cudnn": deterministic_cudnn, + "rewards_config": self.REWARD_CONFIG, } def train_task_sampler_args( From a76d94f0b4e7080846336ca8e1060593000852c4 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Wed, 25 Aug 2021 14:10:09 -0700 Subject: [PATCH 15/17] shaping rewards added --- .../ithor_plugin/ithor_environment.py | 126 ++++++++++++++++++ allenact_plugins/ithor_plugin/ithor_tasks.py | 66 ++++++++- 2 files changed, 187 insertions(+), 5 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index fce33e4b0..7ecff01b9 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -15,6 +15,12 @@ from allenact.utils.system import get_logger from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_util import round_to_factor +from ai2thor.util import metrics +from allenact.utils.cache_utils import ( + DynamicDistanceCache, + pos_to_str_for_cache, + str_to_pos_for_cache, +) class IThorEnvironment(object): @@ -31,6 +37,7 @@ class IThorEnvironment(object): def __init__( self, + all_metadata_available: bool = True, x_display: Optional[str] = None, docker_enabled: bool = False, local_thor_build: Optional[str] = None, @@ -46,6 +53,7 @@ def __init__( object_open_speed: float = 1.0, simplify_physics: bool = False, snap_to_grid: bool = True, + agent_count: int = 1, **kwargs, ) -> None: """Initializer. @@ -86,6 +94,7 @@ def __init__( self._started = False self._quality = quality self._snap_to_grid = snap_to_grid + self.agent_count = agent_count self._initially_reachable_points: Optional[List[Dict]] = None self._initially_reachable_points_set: Optional[Set[Tuple[float, float]]] = None @@ -101,14 +110,119 @@ def __init__( self.object_open_speed = object_open_speed self._always_return_visible_range = False self.simplify_physics = simplify_physics + self.all_metadata_available = all_metadata_available + + + + self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None + self.distance_cache: Optional[DynamicDistanceCache] = None + + self.start(None) # noinspection PyTypeHints + if self.all_metadata_available: + self.scene_to_reachable_positions = { + self.scene_name: copy.deepcopy(self.currently_reachable_points) + } + assert len(self.scene_to_reachable_positions[self.scene_name]) > 10 + + self.distance_cache = DynamicDistanceCache(rounding=1) self.controller.docker_enabled = docker_enabled # type: ignore self._extra_teleport_kwargs: Dict[ str, Any ] = {} # Used for backwards compatability with the teleport action + def path_from_point_to_object_type( + self, point: Dict[str, float], object_type: str, allowed_error: float + ) -> Optional[List[Dict[str, float]]]: + event = self.controller.step( + action="GetShortestPath", + objectType=object_type, + position=point, + allowedError=allowed_error, + ) + if event.metadata["lastActionSuccess"]: + return event.metadata["actionReturn"]["corners"] + else: + get_logger().debug( + "Failed to find path for {} in {}. Start point {}, agent state {}.".format( + object_type, + self.controller.last_event.metadata["sceneName"], + point, + self.agent_state(), + ) + ) + return None + + def distance_from_point_to_object_type( + self, point: Dict[str, float], object_type: str, allowed_error: float + ) -> float: + """Minimal geodesic distance from a point to an object of the given + type. + It might return -1.0 for unreachable targets. + """ + path = self.path_from_point_to_object_type(point, object_type, allowed_error) + if path: + # Because `allowed_error != 0` means that the path returned above might not start + # at `point`, we explicitly add any offset there is. + s_dist = math.sqrt( + (point["x"] - path[0]["x"]) ** 2 + (point["z"] - path[0]["z"]) ** 2 + ) + return metrics.path_distance(path) + s_dist + return -1.0 + + def distance_to_object_type(self, object_type: str, agent_id: int = 0) -> float: + """Minimal geodesic distance to object of given type from agent's + current location. + It might return -1.0 for unreachable targets. + """ + assert 0 <= agent_id < self.agent_count + assert ( + self.all_metadata_available + ), "`distance_to_object_type` cannot be called when `self.all_metadata_available` is `False`." + + def retry_dist(position: Dict[str, float], object_type: str): + allowed_error = 0.05 + debug_log = "" + d = -1.0 + while allowed_error < 2.5: + d = self.distance_from_point_to_object_type( + position, object_type, allowed_error + ) + if d < 0: + debug_log = ( + f"In scene {self.scene_name}, could not find a path from {position} to {object_type} with" + f" {allowed_error} error tolerance. Increasing this tolerance to" + f" {2 * allowed_error} any trying again." + ) + allowed_error *= 2 + else: + break + if d < 0: + get_logger().warning( + f"In scene {self.scene_name}, could not find a path from {position} to {object_type}" + f" with {allowed_error} error tolerance. Returning a distance of -1." + ) + elif debug_log != "": + get_logger().debug(debug_log) + return d + + return self.distance_cache.find_distance( + self.scene_name, + self.controller.last_event.events[agent_id].metadata["agent"]["position"], + object_type, + retry_dist, + ) + + + @property + def currently_reachable_points(self) -> List[Dict[str, float]]: + """List of {"x": x, "y": y, "z": z} locations in the scene that are + currently reachable.""" + self.step({"action": "GetReachablePositions"}) + return self.last_event.metadata["actionReturn"] # type:ignore + @property def scene_name(self) -> str: """Current ai2thor scene.""" @@ -541,6 +655,18 @@ def currently_reachable_points(self) -> List[Dict[str, float]]: currently reachable.""" self.step({"action": "GetReachablePositions"}) return self.last_event.metadata["actionReturn"] # type:ignore + + def agent_state(self, agent_id: int = 0) -> Dict: + """Return agent position, rotation and horizon.""" + assert 0 <= agent_id < self.agent_count + + agent_meta = self.last_event.events[agent_id].metadata["agent"] + return { + **{k: float(v) for k, v in agent_meta["position"].items()}, + "rotation": {k: float(v) for k, v in agent_meta["rotation"].items()}, + "horizon": round(float(agent_meta["cameraHorizon"]), 1), + } + def get_agent_location(self) -> Dict[str, Union[float, bool]]: """Gets agent's location.""" diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py index 259f81f7f..d99c0d4c9 100644 --- a/allenact_plugins/ithor_plugin/ithor_tasks.py +++ b/allenact_plugins/ithor_plugin/ithor_tasks.py @@ -85,9 +85,21 @@ def __init__( List[Tuple[float, float, int, int]] ] = None + self._all_metadata_available = env.all_metadata_available + self.path: List = ( + [] + ) # the initial coordinate will be directly taken from the optimal path + self.travelled_distance = 0.0 self.task_info["followed_path"] = [self.env.get_agent_location()] self.task_info["action_names"] = self.class_action_names() + if self._all_metadata_available: + self.last_geodesic_distance = self.env.distance_to_object_type( + self.task_info["object_type"] + ) + self.optimal_distance = self.last_geodesic_distance + self.closest_geo_distance = self.last_geodesic_distance + @property def action_space(self): return gym.spaces.Discrete(len(self._actions)) @@ -121,13 +133,21 @@ def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None: self.env.update_graph_with_failed_action(failed_action=action_str) - self.task_info["followed_path"].append(self.env.get_agent_location()) + pose = self.env.agent_state() + + self.path.append({k: pose[k] for k in ["x", "y", "z"]}) + self.task_info["followed_path"].append(pose) + if len(self.path) > 1: + self.travelled_distance += IThorEnvironment.position_dist( + p0=self.path[-1], p1=self.path[-2], ignore_y=True + ) + #self.task_info["followed_path"].append(self.env.get_agent_location()) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), - info={"last_action_success": self.last_action_success}, + info={"last_action_success": self.last_action_success, "action": action_str}, ) return step_result @@ -142,15 +162,51 @@ def is_goal_object_visible(self) -> bool: for o in self.env.visible_objects() ) - def dist_to_target(self): - return self.env.distance_to_point(self.task_info["target"]) + + def shaping(self) -> float: + rew = 0.0 + + if self.reward_configs["shaping_weight"] == 0.0: + return rew + + geodesic_distance = self.env.distance_to_object_type( + self.task_info["object_type"] + ) + + # Ensuring the reward magnitude is not greater than the total distance moved + max_reward_mag = 0.0 + if len(self.path) >= 2: + p0, p1 = self.path[-2:] + max_reward_mag = math.sqrt( + (p0["x"] - p1["x"]) ** 2 + (p0["z"] - p1["z"]) ** 2 + ) + + if self.reward_configs.get("positive_only_reward", False): + if geodesic_distance > 0.5: + rew = max(self.closest_geo_distance - geodesic_distance, 0) + else: + if ( + self.last_geodesic_distance > -0.5 and geodesic_distance > -0.5 + ): # (robothor limits) + rew += self.last_geodesic_distance - geodesic_distance + + self.last_geodesic_distance = geodesic_distance + self.closest_geo_distance = min(self.closest_geo_distance, geodesic_distance) + + return ( + max( + min(rew, max_reward_mag), + -max_reward_mag, + ) + * self.reward_configs["shaping_weight"] + ) def judge(self) -> float: """Judge the last event.""" reward = self.reward_configs["step_penalty"] - #reward += self.shaping() + reward += self.shaping() if self._took_end_action: if self._success: From 11bc7cd88a907eaeece781afbc1409ca78d70e44 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Wed, 25 Aug 2021 14:35:09 -0700 Subject: [PATCH 16/17] auto formatting --- allenact_plugins/ithor_plugin/ithor_environment.py | 8 +------- allenact_plugins/ithor_plugin/ithor_tasks.py | 9 +++++---- projects/tutorials/object_nav_ithor_ppo_baseline.py | 12 ++++++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index 7ecff01b9..ffde80e93 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -112,13 +112,9 @@ def __init__( self.simplify_physics = simplify_physics self.all_metadata_available = all_metadata_available - - self.scene_to_reachable_positions: Optional[Dict[str, Any]] = None self.distance_cache: Optional[DynamicDistanceCache] = None - - self.start(None) # noinspection PyTypeHints if self.all_metadata_available: @@ -215,7 +211,6 @@ def retry_dist(position: Dict[str, float], object_type: str): retry_dist, ) - @property def currently_reachable_points(self) -> List[Dict[str, float]]: """List of {"x": x, "y": y, "z": z} locations in the scene that are @@ -655,7 +650,7 @@ def currently_reachable_points(self) -> List[Dict[str, float]]: currently reachable.""" self.step({"action": "GetReachablePositions"}) return self.last_event.metadata["actionReturn"] # type:ignore - + def agent_state(self, agent_id: int = 0) -> Dict: """Return agent position, rotation and horizon.""" assert 0 <= agent_id < self.agent_count @@ -667,7 +662,6 @@ def agent_state(self, agent_id: int = 0) -> Dict: "horizon": round(float(agent_meta["cameraHorizon"]), 1), } - def get_agent_location(self) -> Dict[str, Union[float, bool]]: """Gets agent's location.""" metadata = self.controller.last_event.metadata diff --git a/allenact_plugins/ithor_plugin/ithor_tasks.py b/allenact_plugins/ithor_plugin/ithor_tasks.py index d99c0d4c9..44664c686 100644 --- a/allenact_plugins/ithor_plugin/ithor_tasks.py +++ b/allenact_plugins/ithor_plugin/ithor_tasks.py @@ -141,13 +141,16 @@ def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: self.travelled_distance += IThorEnvironment.position_dist( p0=self.path[-1], p1=self.path[-2], ignore_y=True ) - #self.task_info["followed_path"].append(self.env.get_agent_location()) + # self.task_info["followed_path"].append(self.env.get_agent_location()) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), - info={"last_action_success": self.last_action_success, "action": action_str}, + info={ + "last_action_success": self.last_action_success, + "action": action_str, + }, ) return step_result @@ -162,7 +165,6 @@ def is_goal_object_visible(self) -> bool: for o in self.env.visible_objects() ) - def shaping(self) -> float: rew = 0.0 @@ -201,7 +203,6 @@ def shaping(self) -> float: * self.reward_configs["shaping_weight"] ) - def judge(self) -> float: """Judge the last event.""" reward = self.reward_configs["step_penalty"] diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 4ab8025c3..8aa08d405 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -80,10 +80,14 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): ) TRAIN_SCENES = [ - scene.split("/")[-1].split(".")[0] for scene in glob.glob(train_path) + os.path.basename(scene).split(".")[0] for scene in glob.glob(train_path) + ] + VALID_SCENES = [ + os.path.basename(scene).split(".")[0] for scene in glob.glob(val_path) + ] + TEST_SCENES = [ + os.path.basename(scene).split(".")[0] for scene in glob.glob(test_path) ] - VALID_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(val_path)] - TEST_SCENES = [scene.split("/")[-1].split(".")[0] for scene in glob.glob(test_path)] # Setting up sensors and basic environment details CAMERA_WIDTH = 400 @@ -123,7 +127,7 @@ class ObjectNavThorPPOExperimentConfig(ExperimentConfig): "rotate_step_degrees": 30, "visibility_distance": 1.0, "grid_size": 0.25, - "snap_to_grid": False + "snap_to_grid": False, } MAX_STEPS = 500 From 5f86224f5ad9539ebc8469b5c6eaf192f81baff0 Mon Sep 17 00:00:00 2001 From: Kshitij Dwivedi Date: Fri, 27 Aug 2021 02:43:57 -0700 Subject: [PATCH 17/17] LGTM warnings addressed --- .../ithor_plugin/ithor_environment.py | 16 +++------------- .../ithor_plugin/ithor_task_samplers.py | 2 +- .../tutorials/object_nav_ithor_ppo_baseline.py | 17 ++++------------- 3 files changed, 8 insertions(+), 27 deletions(-) diff --git a/allenact_plugins/ithor_plugin/ithor_environment.py b/allenact_plugins/ithor_plugin/ithor_environment.py index ffde80e93..c77b08ada 100644 --- a/allenact_plugins/ithor_plugin/ithor_environment.py +++ b/allenact_plugins/ithor_plugin/ithor_environment.py @@ -16,11 +16,7 @@ from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_util import round_to_factor from ai2thor.util import metrics -from allenact.utils.cache_utils import ( - DynamicDistanceCache, - pos_to_str_for_cache, - str_to_pos_for_cache, -) +from allenact.utils.cache_utils import DynamicDistanceCache class IThorEnvironment(object): @@ -282,10 +278,7 @@ def last_action_return(self, value: Any) -> None: self.controller.last_event.metadata["actionReturn"] = value def start( - self, - scene_name: Optional[str], - move_mag: float = 0.25, - **kwargs, + self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ) -> None: """Starts the ai2thor controller if it was previously stopped. @@ -340,10 +333,7 @@ def stop(self) -> None: self._started = False def reset( - self, - scene_name: Optional[str], - move_mag: float = 0.25, - **kwargs, + self, scene_name: Optional[str], move_mag: float = 0.25, **kwargs, ): """Resets the ai2thor in a new scene. diff --git a/allenact_plugins/ithor_plugin/ithor_task_samplers.py b/allenact_plugins/ithor_plugin/ithor_task_samplers.py index ca83de9be..19d0f4e6a 100644 --- a/allenact_plugins/ithor_plugin/ithor_task_samplers.py +++ b/allenact_plugins/ithor_plugin/ithor_task_samplers.py @@ -2,7 +2,7 @@ import random import gzip import json -from typing import List, Optional, Union, Dict, Any, cast, Tuple +from typing import List, Optional, Union, Dict, Any, cast import gym diff --git a/projects/tutorials/object_nav_ithor_ppo_baseline.py b/projects/tutorials/object_nav_ithor_ppo_baseline.py index 8aa08d405..ed7a3d68c 100644 --- a/projects/tutorials/object_nav_ithor_ppo_baseline.py +++ b/projects/tutorials/object_nav_ithor_ppo_baseline.py @@ -1,5 +1,5 @@ from math import ceil -from typing import Dict, Any, List, Optional, Sequence, Union +from typing import Dict, Any, List, Optional, Sequence import glob import os import gym @@ -10,10 +10,8 @@ from torch.optim.lr_scheduler import LambdaLR from torchvision import models -from allenact.base_abstractions.preprocessor import ( - Preprocessor, - SensorPreprocessorGraph, -) +from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph + from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.embodiedai.preprocessors.resnet import ResNetPreprocessor @@ -35,12 +33,10 @@ GoalObjectTypeThorSensor, ) from allenact_plugins.ithor_plugin.ithor_task_samplers import ( - ObjectNavTaskSampler, ObjectNaviThorDatasetTaskSampler, ) from allenact_plugins.ithor_plugin.ithor_tasks import ObjectNaviThorGridTask from projects.objectnav_baselines.models.object_nav_models import ( - ObjectNavBaselineActorCritic, ResnetTensorObjectNavActorCritic, ) @@ -179,10 +175,7 @@ def training_pipeline(cls, **kwargs): gae_lambda=gae_lambda, advance_scene_rollout_period=cls.ADVANCE_SCENE_ROLLOUT_PERIOD, pipeline_stages=[ - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=ppo_steps, - ), + PipelineStage(loss_names=["ppo_loss"], max_stage_steps=ppo_steps,), ], lr_scheduler_builder=Builder( LambdaLR, {"lr_lambda": LinearDecay(steps=ppo_steps)} @@ -191,8 +184,6 @@ def training_pipeline(cls, **kwargs): @classmethod def machine_params(cls, mode="train", **kwargs): - num_gpus = torch.cuda.device_count() - has_gpu = num_gpus != 0 if mode == "train": nprocesses = 40