[@SulekBartek] Added Pandaset autonomous driving dataset class and configuration file for RandLANet. (#611)

ssheorey · ssheorey · commit a3ced7af339c · 2023-12-22T14:48:13.000-08:00
Fixup to earlier merge commit.
Add dataset info (license, citation, website, download, etc.)
Style fix

Author: SulekBartek
diff --git a/ml3d/datasets/pandaset.py b/ml3d/datasets/pandaset.py
@@ -1,56 +1,81 @@
 import os
 from os.path import join
-import numpy as np
-import pandas as pd
 from pathlib import Path
 import logging
+import numpy as np
+import pandas as pd
 
 from .base_dataset import BaseDataset, BaseDatasetSplit
 from ..utils import make_dir, DATASET
 
 log = logging.getLogger(__name__)
 
+
 class Pandaset(BaseDataset):
-    """ This class is used to create a dataset based on the Pandaset autonomous
+    """This class is used to create a dataset based on the Pandaset autonomous
     driving dataset.
 
-    https://pandaset.org/
-    
-    The dataset includes 42 semantic classes and covers more than 100 scenes,
-    each of which is 8 seconds long.
-    
+    PandaSet aims to promote and advance research and development in autonomous
+    driving and machine learning. The first open-source AV dataset available for
+    both academic and commercial use, PandaSet combines Hesai’s best-in-class
+    LiDAR sensors with Scale AI’s high-quality data annotation.
+
+    PandaSet features data collected using a forward-facing LiDAR with
+    image-like resolution (PandarGT) as well as a mechanical spinning LiDAR
+    (Pandar64). The collected data was annotated with a combination of cuboid
+    and segmentation annotation (Scale 3D Sensor Fusion Segmentation).
+
+    It features::
+
+      - 48,000+ camera images
+      - 16,000+ LiDAR sweeps
+      - 100+ scenes of 8s each
+      - 28 annotation classes
+      - 37 semantic segmentation labels
+      - Full sensor suite: 1x mechanical spinning LiDAR, 1x forward-facing LiDAR, 6x cameras, On-board GPS/IMU
+
+    Website: https://pandaset.org/
+    Code: https://github.com/scaleapi/pandaset-devkit
+    Download: https://www.kaggle.com/datasets/usharengaraju/pandaset-dataset/data
+    Data License: CC0: Public Domain (https://scale.com/legal/pandaset-terms-of-use)
+    Citation: https://arxiv.org/abs/2112.12610
     """
+
     def __init__(self,
                  dataset_path,
                  name="Pandaset",
                  cache_dir="./logs/cache",
                  use_cache=False,
                  ignored_label_inds=[],
                  test_result_folder='./logs/test_log',
-                 test_split=['115', '116', '117', '119', '120', '124', '139', '149', '158'],
+                 test_split=[
+                     '115', '116', '117', '119', '120', '124', '139', '149',
+                     '158'
+                 ],
                  training_split=[
-                            '001', '002', '003', '005', '011', '013', '015', '016',
-                            '017', '019', '021', '023', '024', '027', '028', '029', 
-                            '030', '032', '033', '034', '035', '037', '038', '039', 
-                            '040', '041', '042', '043', '044', '046', '052', '053', 
-                            '054', '056', '057', '058', '064', '065', '066', '067',
-                            '070', '071', '072', '073', '077', '078', '080', '084',
-                            '088', '089', '090', '094', '095', '097', '098', '101',
-                            '102', '103', '105', '106', '109', '110', '112', '113'
+                     '001', '002', '003', '005', '011', '013', '015', '016',
+                     '017', '019', '021', '023', '024', '027', '028', '029',
+                     '030', '032', '033', '034', '035', '037', '038', '039',
+                     '040', '041', '042', '043', '044', '046', '052', '053',
+                     '054', '056', '057', '058', '064', '065', '066', '067',
+                     '070', '071', '072', '073', '077', '078', '080', '084',
+                     '088', '089', '090', '094', '095', '097', '098', '101',
+                     '102', '103', '105', '106', '109', '110', '112', '113'
+                 ],
+                 validation_split=['122', '123'],
+                 all_split=[
+                     '001', '002', '003', '005', '011', '013', '015', '016',
+                     '017', '019', '021', '023', '024', '027', '028', '029',
+                     '030', '032', '033', '034', '035', '037', '038', '039',
+                     '040', '041', '042', '043', '044', '046', '052', '053',
+                     '054', '056', '057', '058', '064', '065', '066', '067',
+                     '069', '070', '071', '072', '073', '077', '078', '080',
+                     '084', '088', '089', '090', '094', '095', '097', '098',
+                     '101', '102', '103', '105', '106', '109', '110', '112',
+                     '113', '115', '116', '117', '119', '120', '122', '123',
+                     '124', '139', '149', '158'
                  ],
-                 validation_split=['122', '123'], 
-                 all_split=['001', '002', '003', '005', '011', '013', '015', '016',
-                            '017', '019', '021', '023', '024', '027', '028', '029', 
-                            '030', '032', '033', '034', '035', '037', '038', '039', 
-                            '040', '041', '042', '043', '044', '046', '052', '053', 
-                            '054', '056', '057', '058', '064', '065', '066', '067', 
-                            '069', '070', '071', '072', '073', '077', '078', '080', 
-                            '084', '088', '089', '090', '094', '095', '097', '098', 
-                            '101', '102', '103', '105', '106', '109', '110', '112', 
-                            '113', '115', '116', '117', '119', '120', '122', '123', 
-                            '124', '139', '149', '158'],
                  **kwargs):
-        
         """Initialize the function by passing the dataset and other details.
 
         Args:
@@ -79,7 +104,7 @@ def __init__(self,
         self.label_to_names = self.get_label_to_names()
         self.num_classes = len(self.label_to_names)
         self.label_values = np.sort([k for k, v in self.label_to_names.items()])
-        
+
     @staticmethod
     def get_label_to_names():
         """Returns a label to names dictionary object.
@@ -89,48 +114,48 @@ def get_label_to_names():
             values are the corresponding names.
         """
         label_to_names = {
-            1: "Reflection", 
-            2: "Vegetation", 
-            3: "Ground", 
-            4: "Road", 
-            5: "Lane Line Marking", 
-            6: "Stop Line Marking", 
-            7: "Other Road Marking", 
-            8: "Sidewalk", 
-            9: "Driveway", 
-            10: "Car", 
-            11: "Pickup Truck", 
-            12: "Medium-sized Truck", 
-            13: "Semi-truck", 
-            14: "Towed Object", 
-            15: "Motorcycle", 
-            16: "Other Vehicle - Construction Vehicle", 
-            17: "Other Vehicle - Uncommon", 
-            18: "Other Vehicle - Pedicab", 
-            19: "Emergency Vehicle", 
-            20: "Bus", 
-            21: "Personal Mobility Device", 
-            22: "Motorized Scooter", 
-            23: "Bicycle", 
-            24: "Train", 
-            25: "Trolley", 
-            26: "Tram / Subway", 
-            27: "Pedestrian", 
-            28: "Pedestrian with Object", 
-            29: "Animals - Bird", 
-            30: "Animals - Other", 
-            31: "Pylons", 
-            32: "Road Barriers", 
-            33: "Signs", 
-            34: "Cones", 
-            35: "Construction Signs", 
-            36: "Temporary Construction Barriers", 
-            37: "Rolling Containers", 
-            38: "Building", 
+            1: "Reflection",
+            2: "Vegetation",
+            3: "Ground",
+            4: "Road",
+            5: "Lane Line Marking",
+            6: "Stop Line Marking",
+            7: "Other Road Marking",
+            8: "Sidewalk",
+            9: "Driveway",
+            10: "Car",
+            11: "Pickup Truck",
+            12: "Medium-sized Truck",
+            13: "Semi-truck",
+            14: "Towed Object",
+            15: "Motorcycle",
+            16: "Other Vehicle - Construction Vehicle",
+            17: "Other Vehicle - Uncommon",
+            18: "Other Vehicle - Pedicab",
+            19: "Emergency Vehicle",
+            20: "Bus",
+            21: "Personal Mobility Device",
+            22: "Motorized Scooter",
+            23: "Bicycle",
+            24: "Train",
+            25: "Trolley",
+            26: "Tram / Subway",
+            27: "Pedestrian",
+            28: "Pedestrian with Object",
+            29: "Animals - Bird",
+            30: "Animals - Other",
+            31: "Pylons",
+            32: "Road Barriers",
+            33: "Signs",
+            34: "Cones",
+            35: "Construction Signs",
+            36: "Temporary Construction Barriers",
+            37: "Rolling Containers",
+            38: "Building",
             39: "Other Static Object"
         }
         return label_to_names
-    
+
     def get_split(self, split):
         """Returns a dataset split.
 
@@ -142,7 +167,7 @@ def get_split(self, split):
             A dataset split object providing the requested subset of the data.
         """
         return PandasetSplit(self, split=split)
-    
+
     def get_split_list(self, split):
         """Returns the list of data splits available.
 
@@ -154,8 +179,8 @@ def get_split_list(self, split):
             A dataset split object providing the requested subset of the data.
 
         Raises:
-            ValueError: Indicates that the split name passed is incorrect. The split name should be one of
-            'training', 'test', 'validation', or 'all'.
+            ValueError: Indicates that the split name passed is incorrect. The
+            split name should be one of 'training', 'test', 'validation', or 'all'.
         """
         cfg = self.cfg
         dataset_path = cfg.dataset_path
@@ -179,7 +204,7 @@ def get_split_list(self, split):
                     file_list.append(join(pc_path, f))
 
         return file_list
-    
+
     def is_tested(self, attr):
         """Checks if a datum in the dataset has been tested.
 
@@ -224,7 +249,7 @@ def save_test_result(self, results, attr):
 class PandasetSplit(BaseDatasetSplit):
     """This class is used to create a split for Pandaset dataset.
 
-    Args: 
+    Args:
         dataset: The dataset to split.
         split: A string identifying the dataset split that is usually one of
             'training', 'test', 'validation', or 'all'.
@@ -233,6 +258,7 @@ class PandasetSplit(BaseDatasetSplit):
     Returns:
         A dataset split object providing the requested subset of the data.
     """
+
     def __init__(self, dataset, split='train'):
         super().__init__(dataset, split=split)
         log.info("Found {} pointclouds for {}".format(len(self.path_list),
@@ -244,19 +270,16 @@ def __len__(self):
     def get_data(self, idx):
         pc_path = self.path_list[idx]
         label_path = pc_path.replace('lidar', 'annotations/semseg')
-    
+
         points = pd.read_pickle(pc_path)
         labels = pd.read_pickle(label_path)
-        
+
         intensity = points['i'].to_numpy().astype(np.float32)
-        points = points.drop(columns=['i', 't', 'd']).to_numpy().astype(np.float32)
+        points = points.drop(columns=['i', 't', 'd']).to_numpy().astype(
+            np.float32)
         labels = labels.to_numpy().astype(np.int32)
 
-        data = {
-            'point': points,
-            'intensity': intensity,
-            'label': labels
-        }
+        data = {'point': points, 'intensity': intensity, 'label': labels}
 
         return data
 
@@ -269,4 +292,5 @@ def get_attr(self, idx):
         attr = {'name': name, 'path': pc_path, 'split': self.split}
         return attr
 
+
 DATASET._register_module(Pandaset)