Add Pandaset class and config

Bartosz Sułek · Bartosz Sułek · commit 783a3bba1832 · 2023-08-06T21:08:57.000+02:00
diff --git a/ml3d/configs/randlanet_pandaset.yml b/ml3d/configs/randlanet_pandaset.yml
@@ -0,0 +1,47 @@
+dataset:
+  name: Pandaset
+  dataset_path: path_to_dataset
+  cache_dir: ./logs/cache
+  test_result_folder: './logs/test'
+  training_split: [ '001', '002', '003', '005', '011', '013', '015', '016',
+                    '017', '019', '021', '023', '024', '027', '028', '029', 
+                    '030', '032', '033', '034', '035', '037', '038', '039', 
+                    '040', '041', '042', '043', '044', '046', '052', '053', 
+                    '054', '056', '057', '058', '064', '065', '066', '067',
+                    '070', '071', '072', '073', '077', '078', '080', '084',
+                    '088', '089', '090', '094', '095', '097', '098', '101',
+                    '102', '103', '105', '106', '109', '110', '112', '113'
+                    ]
+  test_split: ['115', '116', '117', '119', '120', '124', '139', '149', '158']
+  validation_split: ['122', '123']
+  use_cache: true
+  sampler:
+    name: 'SemSegRandomSampler'
+model:
+  name: RandLANet
+  batcher: DefaultBatcher
+  num_classes: 39
+  num_points: 81920
+  num_neighbors: 16
+  framework: torch
+  num_layers: 4
+  ignored_label_inds: [0]
+  sub_sampling_ratio: [4, 4, 4, 4]
+  in_channels: 3
+  dim_features: 8
+  dim_output: [16, 64, 128, 256]
+  grid_size: 0.06
+pipeline:
+  name: SemanticSegmentation
+  max_epoch: 50
+  save_ckpt_freq: 5
+  device: gpu
+  optimizer:
+    lr: 0.001
+  batch_size: 4
+  main_log_dir: './logs'
+  logs_dir: './logs'
+  scheduler_gamma: 0.9886
+  test_batch_size: 2
+  train_sum_dir: './logs/training_log'
+  val_batch_size: 2
diff --git a/ml3d/datasets/pandaset.py b/ml3d/datasets/pandaset.py
@@ -0,0 +1,272 @@
+import os
+from os.path import join
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import logging
+
+from .base_dataset import BaseDataset, BaseDatasetSplit
+from ..utils import make_dir, DATASET
+
+log = logging.getLogger(__name__)
+
+class Pandaset(BaseDataset):
+    """ This class is used to create a dataset based on the Pandaset autonomous
+    driving dataset.
+
+    https://pandaset.org/
+    
+    The dataset includes 42 semantic classes and covers more than 100 scenes,
+    each of which is 8 seconds long.
+    
+    """
+    def __init__(self,
+                 dataset_path,
+                 name="Pandaset",
+                 cache_dir="./logs/cache",
+                 use_cache=False,
+                 ignored_label_inds=[],
+                 test_result_folder='./logs/test_log',
+                 test_split=['115', '116', '117', '119', '120', '124', '139', '149', '158'],
+                 training_split=[
+                            '001', '002', '003', '005', '011', '013', '015', '016',
+                            '017', '019', '021', '023', '024', '027', '028', '029', 
+                            '030', '032', '033', '034', '035', '037', '038', '039', 
+                            '040', '041', '042', '043', '044', '046', '052', '053', 
+                            '054', '056', '057', '058', '064', '065', '066', '067',
+                            '070', '071', '072', '073', '077', '078', '080', '084',
+                            '088', '089', '090', '094', '095', '097', '098', '101',
+                            '102', '103', '105', '106', '109', '110', '112', '113'
+                 ],
+                 validation_split=['122', '123'], 
+                 all_split=['001', '002', '003', '005', '011', '013', '015', '016',
+                            '017', '019', '021', '023', '024', '027', '028', '029', 
+                            '030', '032', '033', '034', '035', '037', '038', '039', 
+                            '040', '041', '042', '043', '044', '046', '052', '053', 
+                            '054', '056', '057', '058', '064', '065', '066', '067', 
+                            '069', '070', '071', '072', '073', '077', '078', '080', 
+                            '084', '088', '089', '090', '094', '095', '097', '098', 
+                            '101', '102', '103', '105', '106', '109', '110', '112', 
+                            '113', '115', '116', '117', '119', '120', '122', '123', 
+                            '124', '139', '149', '158'],
+                 **kwargs):
+        
+        """Initialize the function by passing the dataset and other details.
+
+        Args:
+            dataset_path: The path to the dataset to use.
+            name: The name of the dataset.
+            cache_dir: The directory where the cache is stored.
+            use_cache: Indicates if the dataset should be cached.
+            ignored_label_inds: A list of labels that should be ignored in the dataset.
+        Returns:
+            class: The corresponding class.
+        """
+        super().__init__(dataset_path=dataset_path,
+                         name=name,
+                         cache_dir=cache_dir,
+                         use_cache=use_cache,
+                         ignored_label_inds=ignored_label_inds,
+                         test_result_folder=test_result_folder,
+                         test_split=test_split,
+                         training_split=training_split,
+                         validation_split=validation_split,
+                         all_split=all_split,
+                         **kwargs)
+
+        cfg = self.cfg
+
+        self.label_to_names = self.get_label_to_names()
+        self.num_classes = len(self.label_to_names)
+        self.label_values = np.sort([k for k, v in self.label_to_names.items()])
+        
+    @staticmethod
+    def get_label_to_names():
+        """Returns a label to names dictionary object.
+
+        Returns:
+            A dict where keys are label numbers and
+            values are the corresponding names.
+        """
+        label_to_names = {
+            1: "Reflection", 
+            2: "Vegetation", 
+            3: "Ground", 
+            4: "Road", 
+            5: "Lane Line Marking", 
+            6: "Stop Line Marking", 
+            7: "Other Road Marking", 
+            8: "Sidewalk", 
+            9: "Driveway", 
+            10: "Car", 
+            11: "Pickup Truck", 
+            12: "Medium-sized Truck", 
+            13: "Semi-truck", 
+            14: "Towed Object", 
+            15: "Motorcycle", 
+            16: "Other Vehicle - Construction Vehicle", 
+            17: "Other Vehicle - Uncommon", 
+            18: "Other Vehicle - Pedicab", 
+            19: "Emergency Vehicle", 
+            20: "Bus", 
+            21: "Personal Mobility Device", 
+            22: "Motorized Scooter", 
+            23: "Bicycle", 
+            24: "Train", 
+            25: "Trolley", 
+            26: "Tram / Subway", 
+            27: "Pedestrian", 
+            28: "Pedestrian with Object", 
+            29: "Animals - Bird", 
+            30: "Animals - Other", 
+            31: "Pylons", 
+            32: "Road Barriers", 
+            33: "Signs", 
+            34: "Cones", 
+            35: "Construction Signs", 
+            36: "Temporary Construction Barriers", 
+            37: "Rolling Containers", 
+            38: "Building", 
+            39: "Other Static Object"
+        }
+        return label_to_names
+    
+    def get_split(self, split):
+        """Returns a dataset split.
+
+        Args:
+            split: A string identifying the dataset split that is usually one of
+            'training', 'test', or 'all'.
+
+        Returns:
+            A dataset split object providing the requested subset of the data.
+        """
+        return PandasetSplit(self, split=split)
+    
+    def get_split_list(self, split):
+        """Returns the list of data splits available.
+
+        Args:
+            split: A string identifying the dataset split that is usually one of
+            'training', 'test', 'validation', or 'all'.
+
+        Returns:
+            A dataset split object providing the requested subset of the data.
+
+        Raises:
+            ValueError: Indicates that the split name passed is incorrect. The split name should be one of
+            'training', 'test', 'validation', or 'all'.
+        """
+        cfg = self.cfg
+        dataset_path = cfg.dataset_path
+        file_list = []
+
+        if split in ['train', 'training']:
+            seq_list = cfg.training_split
+        elif split in ['test', 'testing']:
+            seq_list = cfg.test_split
+        elif split in ['val', 'validation']:
+            seq_list = cfg.validation_split
+        elif split in ['all']:
+            seq_list = cfg.all_split
+        else:
+            raise ValueError("Invalid split {}".format(split))
+
+        for seq_id in seq_list:
+            pc_path = join(dataset_path, seq_id, 'lidar')
+            for f in np.sort(os.listdir(pc_path)):
+                if f.split('.')[-1] == 'gz':
+                    file_list.append(join(pc_path, f))
+
+        return file_list
+    
+    def is_tested(self, attr):
+        """Checks if a datum in the dataset has been tested.
+
+        Args:
+            dataset: The current dataset to which the datum belongs to.
+            attr: The attribute that needs to be checked.
+
+        Returns:
+            If the dataum attribute is tested, then return the path where the
+            attribute is stored; else, returns false.
+        """
+        pass
+
+    def save_test_result(self, results, attr):
+        """Saves the output of a model.
+
+        Args:
+            results: The output of a model for the datum associated with the
+            attribute passed.
+            attrs: The attributes that correspond to the outputs passed in
+            results.
+        """
+        cfg = self.cfg
+        pred = results['predict_labels']
+        name = attr['name']
+
+        test_path = join(cfg.test_result_folder, 'sequences')
+        make_dir(test_path)
+        save_path = join(test_path, name, 'predictions')
+        make_dir(save_path)
+        pred = results['predict_labels']
+
+        for ign in cfg.ignored_label_inds:
+            pred[pred >= ign] += 1
+
+        store_path = join(save_path, name + '.label')
+
+        pred = pred.astype(np.uint32)
+        pred.tofile(store_path)
+
+
+class PandasetSplit(BaseDatasetSplit):
+    """This class is used to create a split for Pandaset dataset.
+
+    Args: 
+        dataset: The dataset to split.
+        split: A string identifying the dataset split that is usually one of
+            'training', 'test', 'validation', or 'all'.
+        **kwargs: The configuration of the model as keyword arguments.
+
+    Returns:
+        A dataset split object providing the requested subset of the data.
+    """
+    def __init__(self, dataset, split='train'):
+        super().__init__(dataset, split=split)
+        log.info("Found {} pointclouds for {}".format(len(self.path_list),
+                                                      split))
+
+    def __len__(self):
+        return len(self.path_list)
+
+    def get_data(self, idx):
+        pc_path = self.path_list[idx]
+        label_path = pc_path.replace('lidar', 'annotations/semseg')
+    
+        points = pd.read_pickle(pc_path)
+        labels = pd.read_pickle(label_path)
+        
+        intensity = points['i'].to_numpy().astype(np.float32)
+        points = points.drop(columns=['i', 't', 'd']).to_numpy().astype(np.float32)
+        labels = labels.to_numpy().astype(np.int32)
+
+        data = {
+            'point': points,
+            'intensity': intensity,
+            'label': labels
+        }
+
+        return data
+
+    def get_attr(self, idx):
+        pc_path = self.path_list[idx]
+        value = (pc_path).split('/')[9]
+        name = Path(pc_path).name.split('.')[0]
+        name = value + '_' + name
+
+        attr = {'name': name, 'path': pc_path, 'split': self.split}
+        return attr
+
+DATASET._register_module(Pandaset)