fix: slight change to autolabels and support for pd (#92)

chrisochoatri · web-flow · commit 5652a445a09c · 2022-05-05T10:14:40.000-07:00
* fix: slight change to autolabels and exposed to pd
diff --git a/dgp/datasets/base_dataset.py b/dgp/datasets/base_dataset.py
@@ -1007,7 +1007,11 @@ def _get_scene_container(
         if requested_autolabels is not None:
             logging.debug(f"Loading autolabeled annotations from {scene_dir}.")
             autolabeled_scenes = _parse_autolabeled_scenes(
-                scene_dir, requested_autolabels, autolabel_root=autolabel_root, skip_missing_data=skip_missing_data
+                scene_dir,
+                requested_autolabels,
+                autolabel_root=autolabel_root,
+                skip_missing_data=skip_missing_data,
+                use_diskcache=use_diskcache,
             )
         else:
             autolabeled_scenes = None
@@ -1381,21 +1385,24 @@ def load_annotations(self, scene_idx, sample_idx_in_scene, datum_name):
         autolabel_annotations = self.get_autolabels_for_datum(scene_idx, sample_idx_in_scene, datum_name)
         for autolabel_key in self.requested_autolabels:
             # Some datums in a sample may not have associated annotations. Return "None" for those datums
-            _, annotation_key = autolabel_key.split('/')
-            # NOTE: model_name should already be stored in the scene json
-            # which is why we do not have to add it here to the annotation_file
+            model_name, annotation_key = autolabel_key.split('/')
+            # NOTE: model_name should typically not be included in the annotation_path stored inside the scene.json
+            # if for some reason it is, then it needs to be removed.
+
             annotation_path = autolabel_annotations.get(autolabel_key, None)
 
             if annotation_path is None:
                 autolabel_annotations[autolabel_key] = None
                 continue
             if self.autolabel_root is not None:
                 annotation_file = os.path.join(
-                    self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), 'autolabels',
-                    annotation_path
+                    self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), AUTOLABEL_FOLDER,
+                    model_name, annotation_path
                 )
             else:
-                annotation_file = os.path.join(self.scenes[scene_idx].directory, 'autolabels', annotation_path)
+                annotation_file = os.path.join(
+                    self.scenes[scene_idx].directory, AUTOLABEL_FOLDER, model_name, annotation_path
+                )
 
             if not os.path.exists(annotation_file):
                 logging.warning(f'missing {annotation_file}')
@@ -1835,7 +1842,13 @@ def get_file_meta_from_datum(self, scene_idx, sample_idx_in_scene, datum_name):
         return data, annotations
 
 
-def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=None, skip_missing_data=False):
+def _parse_autolabeled_scenes(
+    scene_dir,
+    requested_autolabels,
+    autolabel_root=None,
+    skip_missing_data=False,
+    use_diskcache=False,
+):
     """Parse autolabeled scene JSONs
 
     Parameters
@@ -1852,6 +1865,9 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No
     skip_missing_data: bool, defaul: False
         If true, skip over missing autolabel scenes
 
+    use_diskcache: bool, default: False
+        If diskcache should be used for autolabels
+
     Returns
     -------
     autolabeled_scenes: dict
@@ -1883,5 +1899,7 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No
             assert os.path.exists(autolabel_dir), 'Path to autolabels {} does not exist'.format(autolabel_dir)
             assert os.path.exists(autolabel_scene), 'Scene JSON expected but not found at {}'.format(autolabel_scene)
 
-        autolabeled_scenes[autolabel] = SceneContainer(autolabel_scene, directory=autolabel_dir)
+        autolabeled_scenes[autolabel] = SceneContainer(
+            autolabel_scene, directory=autolabel_dir, use_diskcache=use_diskcache
+        )
     return autolabeled_scenes
diff --git a/dgp/datasets/pd_dataset.py b/dgp/datasets/pd_dataset.py
@@ -83,6 +83,9 @@ class _ParallelDomainDataset(_SynchronizedDataset):
 
     transform_accumulated_box_points: bool, default: False
         Flag to use cuboid pose and instance id to warp points when using lidar accumulation.
+
+    autolabel_root: str, default: None
+        Path to autolabels.
     """
     def __init__(
         self,
@@ -98,6 +101,7 @@ def __init__(
         use_virtual_camera_datums=True,
         accumulation_context=None,
         transform_accumulated_box_points=False,
+        autolabel_root=None,
     ):
         self.coalesce_point_cloud = datum_names is not None and \
                                     COALESCED_LIDAR_DATUM_NAME in datum_names
@@ -136,6 +140,7 @@ def __init__(
             only_annotated_datums=only_annotated_datums,
             accumulation_context=accumulation_context,
             transform_accumulated_box_points=transform_accumulated_box_points,
+            autolabel_root=autolabel_root,
         )
 
     def coalesce_pc_data(self, items):
@@ -155,6 +160,12 @@ def coalesce_pc_data(self, items):
         assert self.coalesce_point_cloud
         assert len(pc_items) == len(LIDAR_DATUM_NAMES)
 
+        # TODO: fix this
+        if len(self.requested_autolabels) > 0:
+            logging.warning(
+                'autolabels were requested, however point cloud coalesce does not support coalescing autolabels'
+            )
+
         # Only coalesce if there's more than 1 point cloud
         coalesced_pc = OrderedDict()
         X_V_merged, bbox_3d_V_merged, instance_ids_merged = [], [], []
@@ -248,6 +259,7 @@ def __init__(
         dataset_root=None,
         transform_accumulated_box_points=False,
         use_diskcache=True,
+        autolabel_root=None,
     ):
         if not use_diskcache:
             logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.')
@@ -261,10 +273,16 @@ def __init__(
             skip_missing_data=skip_missing_data,
             dataset_root=dataset_root,
             use_diskcache=use_diskcache,
+            autolabel_root=autolabel_root,
         )
 
         # Return SynchronizedDataset with scenes built from dataset.json
-        dataset_metadata = DatasetMetadata.from_scene_containers(scenes, requested_annotations, requested_autolabels)
+        dataset_metadata = DatasetMetadata.from_scene_containers(
+            scenes,
+            requested_annotations,
+            requested_autolabels,
+            autolabel_root=autolabel_root,
+        )
         super().__init__(
             dataset_metadata,
             scenes=scenes,
@@ -278,6 +296,7 @@ def __init__(
             use_virtual_camera_datums=use_virtual_camera_datums,
             accumulation_context=accumulation_context,
             transform_accumulated_box_points=transform_accumulated_box_points,
+            autolabel_root=autolabel_root,
         )
 
 
@@ -300,6 +319,7 @@ def __init__(
         accumulation_context=None,
         transform_accumulated_box_points=False,
         use_diskcache=True,
+        autolabel_root=None,
     ):
         if not use_diskcache:
             logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.')
@@ -311,10 +331,16 @@ def __init__(
             is_datums_synchronized=True,
             skip_missing_data=skip_missing_data,
             use_diskcache=use_diskcache,
+            autolabel_root=autolabel_root,
         )
 
         # Return SynchronizedDataset with scenes built from dataset.json
-        dataset_metadata = DatasetMetadata.from_scene_containers([scene], requested_annotations, requested_autolabels)
+        dataset_metadata = DatasetMetadata.from_scene_containers(
+            [scene],
+            requested_annotations,
+            requested_autolabels,
+            autolabel_root=autolabel_root,
+        )
         super().__init__(
             dataset_metadata,
             scenes=[scene],
diff --git a/tests/test_autolabel_dataset.py b/tests/test_autolabel_dataset.py
@@ -62,12 +62,16 @@ def clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, auto
             if 'scene' in scene_json and scene_json.endswith('json'):
                 base_scene = open_pbobject(os.path.join(full_scene_dir, scene_json), Scene)
                 for i in range(len(base_scene.data)):
+                    name = base_scene.data[i].id.name
                     datum = base_scene.data[i].datum
                     datum_type = datum.WhichOneof('datum_oneof')
                     datum_value = getattr(datum, datum_type)  # This is datum.image or datum.point_cloud etc
                     annotation_type_id = ANNOTATION_KEY_TO_TYPE_ID[autolabel_type]
                     current_annotation = datum_value.annotations[annotation_type_id]
-                    datum_value.annotations[annotation_type_id] = os.path.join(autolabel_scene_dir, current_annotation)
+                    # NOTE: this should not actually change the path but is included for clarity
+                    datum_value.annotations[annotation_type_id] = os.path.join(
+                        ANNOTATION_TYPE_ID_TO_FOLDER[autolabel_type], name, os.path.basename(current_annotation)
+                    )
 
                 save_pbobject_as_json(base_scene, os.path.join(autolabel_scene_dir, AUTOLABEL_SCENE_JSON_NAME))
                 # Only modify one scene.json, test scene should not contain multiple scene.jsons
@@ -109,7 +113,8 @@ def test_autolabels_default_root(self):
             backward_context=1,
             requested_annotations=('bounding_box_3d', ),
             requested_autolabels=requested_autolabels,
-            autolabel_root=autolabel_root
+            autolabel_root=autolabel_root,
+            use_diskcache=False,
         )
 
         assert len(dataset) == 2
@@ -139,7 +144,8 @@ def test_autolabels_custom_root(self):
             backward_context=1,
             requested_annotations=('bounding_box_3d', ),
             requested_autolabels=requested_autolabels,
-            autolabel_root=autolabel_root
+            autolabel_root=autolabel_root,
+            use_diskcache=False,
         )
 
         assert len(dataset) == 2
@@ -174,6 +180,7 @@ def test_autolabels_missing_files(self):
             requested_autolabels=requested_autolabels,
             autolabel_root=autolabel_root,
             skip_missing_data=True,
+            use_diskcache=False,
         )
 
         assert len(dataset) == 2
@@ -210,6 +217,7 @@ def test_only_annotated_datums(self):
             autolabel_root=autolabel_root,
             only_annotated_datums=True,
             skip_missing_data=True,
+            use_diskcache=False,
         )
 
         assert len(dataset) == 1