1
1
import os
2
2
from os .path import join
3
- import numpy as np
4
- import pandas as pd
5
3
from pathlib import Path
6
4
import logging
5
+ import numpy as np
6
+ import pandas as pd
7
7
8
8
from .base_dataset import BaseDataset , BaseDatasetSplit
9
9
from ..utils import make_dir , DATASET
10
10
11
11
log = logging .getLogger (__name__ )
12
12
13
+
13
14
class Pandaset (BaseDataset ):
14
- """ This class is used to create a dataset based on the Pandaset autonomous
15
+ """This class is used to create a dataset based on the Pandaset autonomous
15
16
driving dataset.
16
17
17
- https://pandaset.org/
18
-
19
- The dataset includes 42 semantic classes and covers more than 100 scenes,
20
- each of which is 8 seconds long.
21
-
18
+ PandaSet aims to promote and advance research and development in autonomous
19
+ driving and machine learning. The first open-source AV dataset available for
20
+ both academic and commercial use, PandaSet combines Hesai’s best-in-class
21
+ LiDAR sensors with Scale AI’s high-quality data annotation.
22
+
23
+ PandaSet features data collected using a forward-facing LiDAR with
24
+ image-like resolution (PandarGT) as well as a mechanical spinning LiDAR
25
+ (Pandar64). The collected data was annotated with a combination of cuboid
26
+ and segmentation annotation (Scale 3D Sensor Fusion Segmentation).
27
+
28
+ It features::
29
+
30
+ - 48,000+ camera images
31
+ - 16,000+ LiDAR sweeps
32
+ - 100+ scenes of 8s each
33
+ - 28 annotation classes
34
+ - 37 semantic segmentation labels
35
+ - Full sensor suite: 1x mechanical spinning LiDAR, 1x forward-facing LiDAR, 6x cameras, On-board GPS/IMU
36
+
37
+ Website: https://pandaset.org/
38
+ Code: https://github.com/scaleapi/pandaset-devkit
39
+ Download: https://www.kaggle.com/datasets/usharengaraju/pandaset-dataset/data
40
+ Data License: CC0: Public Domain (https://scale.com/legal/pandaset-terms-of-use)
41
+ Citation: https://arxiv.org/abs/2112.12610
22
42
"""
43
+
23
44
def __init__ (self ,
24
45
dataset_path ,
25
46
name = "Pandaset" ,
26
47
cache_dir = "./logs/cache" ,
27
48
use_cache = False ,
28
49
ignored_label_inds = [],
29
50
test_result_folder = './logs/test_log' ,
30
- test_split = ['115' , '116' , '117' , '119' , '120' , '124' , '139' , '149' , '158' ],
51
+ test_split = [
52
+ '115' , '116' , '117' , '119' , '120' , '124' , '139' , '149' ,
53
+ '158'
54
+ ],
31
55
training_split = [
32
- '001' , '002' , '003' , '005' , '011' , '013' , '015' , '016' ,
33
- '017' , '019' , '021' , '023' , '024' , '027' , '028' , '029' ,
34
- '030' , '032' , '033' , '034' , '035' , '037' , '038' , '039' ,
35
- '040' , '041' , '042' , '043' , '044' , '046' , '052' , '053' ,
36
- '054' , '056' , '057' , '058' , '064' , '065' , '066' , '067' ,
37
- '070' , '071' , '072' , '073' , '077' , '078' , '080' , '084' ,
38
- '088' , '089' , '090' , '094' , '095' , '097' , '098' , '101' ,
39
- '102' , '103' , '105' , '106' , '109' , '110' , '112' , '113'
56
+ '001' , '002' , '003' , '005' , '011' , '013' , '015' , '016' ,
57
+ '017' , '019' , '021' , '023' , '024' , '027' , '028' , '029' ,
58
+ '030' , '032' , '033' , '034' , '035' , '037' , '038' , '039' ,
59
+ '040' , '041' , '042' , '043' , '044' , '046' , '052' , '053' ,
60
+ '054' , '056' , '057' , '058' , '064' , '065' , '066' , '067' ,
61
+ '070' , '071' , '072' , '073' , '077' , '078' , '080' , '084' ,
62
+ '088' , '089' , '090' , '094' , '095' , '097' , '098' , '101' ,
63
+ '102' , '103' , '105' , '106' , '109' , '110' , '112' , '113'
64
+ ],
65
+ validation_split = ['122' , '123' ],
66
+ all_split = [
67
+ '001' , '002' , '003' , '005' , '011' , '013' , '015' , '016' ,
68
+ '017' , '019' , '021' , '023' , '024' , '027' , '028' , '029' ,
69
+ '030' , '032' , '033' , '034' , '035' , '037' , '038' , '039' ,
70
+ '040' , '041' , '042' , '043' , '044' , '046' , '052' , '053' ,
71
+ '054' , '056' , '057' , '058' , '064' , '065' , '066' , '067' ,
72
+ '069' , '070' , '071' , '072' , '073' , '077' , '078' , '080' ,
73
+ '084' , '088' , '089' , '090' , '094' , '095' , '097' , '098' ,
74
+ '101' , '102' , '103' , '105' , '106' , '109' , '110' , '112' ,
75
+ '113' , '115' , '116' , '117' , '119' , '120' , '122' , '123' ,
76
+ '124' , '139' , '149' , '158'
40
77
],
41
- validation_split = ['122' , '123' ],
42
- all_split = ['001' , '002' , '003' , '005' , '011' , '013' , '015' , '016' ,
43
- '017' , '019' , '021' , '023' , '024' , '027' , '028' , '029' ,
44
- '030' , '032' , '033' , '034' , '035' , '037' , '038' , '039' ,
45
- '040' , '041' , '042' , '043' , '044' , '046' , '052' , '053' ,
46
- '054' , '056' , '057' , '058' , '064' , '065' , '066' , '067' ,
47
- '069' , '070' , '071' , '072' , '073' , '077' , '078' , '080' ,
48
- '084' , '088' , '089' , '090' , '094' , '095' , '097' , '098' ,
49
- '101' , '102' , '103' , '105' , '106' , '109' , '110' , '112' ,
50
- '113' , '115' , '116' , '117' , '119' , '120' , '122' , '123' ,
51
- '124' , '139' , '149' , '158' ],
52
78
** kwargs ):
53
-
54
79
"""Initialize the function by passing the dataset and other details.
55
80
56
81
Args:
@@ -79,7 +104,7 @@ def __init__(self,
79
104
self .label_to_names = self .get_label_to_names ()
80
105
self .num_classes = len (self .label_to_names )
81
106
self .label_values = np .sort ([k for k , v in self .label_to_names .items ()])
82
-
107
+
83
108
@staticmethod
84
109
def get_label_to_names ():
85
110
"""Returns a label to names dictionary object.
@@ -89,48 +114,48 @@ def get_label_to_names():
89
114
values are the corresponding names.
90
115
"""
91
116
label_to_names = {
92
- 1 : "Reflection" ,
93
- 2 : "Vegetation" ,
94
- 3 : "Ground" ,
95
- 4 : "Road" ,
96
- 5 : "Lane Line Marking" ,
97
- 6 : "Stop Line Marking" ,
98
- 7 : "Other Road Marking" ,
99
- 8 : "Sidewalk" ,
100
- 9 : "Driveway" ,
101
- 10 : "Car" ,
102
- 11 : "Pickup Truck" ,
103
- 12 : "Medium-sized Truck" ,
104
- 13 : "Semi-truck" ,
105
- 14 : "Towed Object" ,
106
- 15 : "Motorcycle" ,
107
- 16 : "Other Vehicle - Construction Vehicle" ,
108
- 17 : "Other Vehicle - Uncommon" ,
109
- 18 : "Other Vehicle - Pedicab" ,
110
- 19 : "Emergency Vehicle" ,
111
- 20 : "Bus" ,
112
- 21 : "Personal Mobility Device" ,
113
- 22 : "Motorized Scooter" ,
114
- 23 : "Bicycle" ,
115
- 24 : "Train" ,
116
- 25 : "Trolley" ,
117
- 26 : "Tram / Subway" ,
118
- 27 : "Pedestrian" ,
119
- 28 : "Pedestrian with Object" ,
120
- 29 : "Animals - Bird" ,
121
- 30 : "Animals - Other" ,
122
- 31 : "Pylons" ,
123
- 32 : "Road Barriers" ,
124
- 33 : "Signs" ,
125
- 34 : "Cones" ,
126
- 35 : "Construction Signs" ,
127
- 36 : "Temporary Construction Barriers" ,
128
- 37 : "Rolling Containers" ,
129
- 38 : "Building" ,
117
+ 1 : "Reflection" ,
118
+ 2 : "Vegetation" ,
119
+ 3 : "Ground" ,
120
+ 4 : "Road" ,
121
+ 5 : "Lane Line Marking" ,
122
+ 6 : "Stop Line Marking" ,
123
+ 7 : "Other Road Marking" ,
124
+ 8 : "Sidewalk" ,
125
+ 9 : "Driveway" ,
126
+ 10 : "Car" ,
127
+ 11 : "Pickup Truck" ,
128
+ 12 : "Medium-sized Truck" ,
129
+ 13 : "Semi-truck" ,
130
+ 14 : "Towed Object" ,
131
+ 15 : "Motorcycle" ,
132
+ 16 : "Other Vehicle - Construction Vehicle" ,
133
+ 17 : "Other Vehicle - Uncommon" ,
134
+ 18 : "Other Vehicle - Pedicab" ,
135
+ 19 : "Emergency Vehicle" ,
136
+ 20 : "Bus" ,
137
+ 21 : "Personal Mobility Device" ,
138
+ 22 : "Motorized Scooter" ,
139
+ 23 : "Bicycle" ,
140
+ 24 : "Train" ,
141
+ 25 : "Trolley" ,
142
+ 26 : "Tram / Subway" ,
143
+ 27 : "Pedestrian" ,
144
+ 28 : "Pedestrian with Object" ,
145
+ 29 : "Animals - Bird" ,
146
+ 30 : "Animals - Other" ,
147
+ 31 : "Pylons" ,
148
+ 32 : "Road Barriers" ,
149
+ 33 : "Signs" ,
150
+ 34 : "Cones" ,
151
+ 35 : "Construction Signs" ,
152
+ 36 : "Temporary Construction Barriers" ,
153
+ 37 : "Rolling Containers" ,
154
+ 38 : "Building" ,
130
155
39 : "Other Static Object"
131
156
}
132
157
return label_to_names
133
-
158
+
134
159
def get_split (self , split ):
135
160
"""Returns a dataset split.
136
161
@@ -142,7 +167,7 @@ def get_split(self, split):
142
167
A dataset split object providing the requested subset of the data.
143
168
"""
144
169
return PandasetSplit (self , split = split )
145
-
170
+
146
171
def get_split_list (self , split ):
147
172
"""Returns the list of data splits available.
148
173
@@ -154,8 +179,8 @@ def get_split_list(self, split):
154
179
A dataset split object providing the requested subset of the data.
155
180
156
181
Raises:
157
- ValueError: Indicates that the split name passed is incorrect. The split name should be one of
158
- 'training', 'test', 'validation', or 'all'.
182
+ ValueError: Indicates that the split name passed is incorrect. The
183
+ split name should be one of 'training', 'test', 'validation', or 'all'.
159
184
"""
160
185
cfg = self .cfg
161
186
dataset_path = cfg .dataset_path
@@ -179,7 +204,7 @@ def get_split_list(self, split):
179
204
file_list .append (join (pc_path , f ))
180
205
181
206
return file_list
182
-
207
+
183
208
def is_tested (self , attr ):
184
209
"""Checks if a datum in the dataset has been tested.
185
210
@@ -224,7 +249,7 @@ def save_test_result(self, results, attr):
224
249
class PandasetSplit (BaseDatasetSplit ):
225
250
"""This class is used to create a split for Pandaset dataset.
226
251
227
- Args:
252
+ Args:
228
253
dataset: The dataset to split.
229
254
split: A string identifying the dataset split that is usually one of
230
255
'training', 'test', 'validation', or 'all'.
@@ -233,6 +258,7 @@ class PandasetSplit(BaseDatasetSplit):
233
258
Returns:
234
259
A dataset split object providing the requested subset of the data.
235
260
"""
261
+
236
262
def __init__ (self , dataset , split = 'train' ):
237
263
super ().__init__ (dataset , split = split )
238
264
log .info ("Found {} pointclouds for {}" .format (len (self .path_list ),
@@ -244,19 +270,16 @@ def __len__(self):
244
270
def get_data (self , idx ):
245
271
pc_path = self .path_list [idx ]
246
272
label_path = pc_path .replace ('lidar' , 'annotations/semseg' )
247
-
273
+
248
274
points = pd .read_pickle (pc_path )
249
275
labels = pd .read_pickle (label_path )
250
-
276
+
251
277
intensity = points ['i' ].to_numpy ().astype (np .float32 )
252
- points = points .drop (columns = ['i' , 't' , 'd' ]).to_numpy ().astype (np .float32 )
278
+ points = points .drop (columns = ['i' , 't' , 'd' ]).to_numpy ().astype (
279
+ np .float32 )
253
280
labels = labels .to_numpy ().astype (np .int32 )
254
281
255
- data = {
256
- 'point' : points ,
257
- 'intensity' : intensity ,
258
- 'label' : labels
259
- }
282
+ data = {'point' : points , 'intensity' : intensity , 'label' : labels }
260
283
261
284
return data
262
285
@@ -269,4 +292,5 @@ def get_attr(self, idx):
269
292
attr = {'name' : name , 'path' : pc_path , 'split' : self .split }
270
293
return attr
271
294
295
+
272
296
DATASET ._register_module (Pandaset )
0 commit comments