1
1
# https://cocodataset.org/#format-data
2
2
3
- from labelbox .data .serialization .coco .categories import Categories , hash_category_name
4
- from labelbox .data .serialization .coco .annotation import COCOObjectAnnotation , RLE , get_annotation_lookup , rle_decoding
5
- from labelbox .data .serialization .coco .image import CocoImage , get_image , get_image_id
6
- from typing import Any , Dict , List
7
- from pydantic import BaseModel
8
- from ...annotation_types import ImageData , MaskData , Mask , ObjectAnnotation , Label , Polygon , Point , Rectangle
3
+ from concurrent .futures import ProcessPoolExecutor , as_completed
4
+ from typing import Any , Dict , List , Tuple
5
+ from pathlib import Path
6
+
9
7
import numpy as np
10
- from PIL import Image
11
8
from tqdm import tqdm
12
- import os
13
- from concurrent .futures import ProcessPoolExecutor , as_completed
9
+ from pydantic import BaseModel
10
+
11
+ from ...annotation_types import ImageData , MaskData , Mask , ObjectAnnotation , Label , Polygon , Point , Rectangle
14
12
from ...annotation_types .collection import LabelCollection
13
+ from .categories import Categories , hash_category_name
14
+ from .annotation import COCOObjectAnnotation , RLE , get_annotation_lookup , rle_decoding
15
+ from .image import CocoImage , get_image , get_image_id
15
16
16
17
17
- def mask_to_coco_object_annotation (annotation : ObjectAnnotation , annot_idx ,
18
- image_id , category_id ) :
18
+ def mask_to_coco_object_annotation (annotation : ObjectAnnotation , annot_idx : int ,
19
+ image_id : int , category_id : int ) -> COCOObjectAnnotation :
19
20
# This is going to fill any holes into the multipolygon
20
21
# If you need to support holes use the panoptic data format
21
22
shapely = annotation .value .shapely .simplify (1 ).buffer (0 )
@@ -37,8 +38,8 @@ def mask_to_coco_object_annotation(annotation: ObjectAnnotation, annot_idx,
37
38
iscrowd = 0 )
38
39
39
40
40
- def vector_to_coco_object_annotation (annotation : ObjectAnnotation , annot_idx ,
41
- image_id : int , category_id ) :
41
+ def vector_to_coco_object_annotation (annotation : ObjectAnnotation , annot_idx : int ,
42
+ image_id : int , category_id : int ) -> COCOObjectAnnotation :
42
43
shapely = annotation .value .shapely
43
44
xmin , ymin , xmax , ymax = shapely .bounds
44
45
segmentation = []
@@ -61,15 +62,15 @@ def vector_to_coco_object_annotation(annotation: ObjectAnnotation, annot_idx,
61
62
iscrowd = 0 )
62
63
63
64
64
- def rle_to_common (class_annotations , class_name ) :
65
+ def rle_to_common (class_annotations : COCOObjectAnnotation , class_name : str ) -> ObjectAnnotation :
65
66
mask = rle_decoding (class_annotations .segmentation .counts ,
66
67
* class_annotations .segmentation .size [::- 1 ])
67
68
return ObjectAnnotation (name = class_name ,
68
69
value = Mask (mask = MaskData .from_2D_arr (mask ),
69
70
color = [1 , 1 , 1 ]))
70
71
71
72
72
- def segmentations_to_common (class_annotations , class_name ) :
73
+ def segmentations_to_common (class_annotations : COCOObjectAnnotation , class_name : str ) -> List [ ObjectAnnotation ] :
73
74
# Technically it is polygons. But the key in coco is called segmentations..
74
75
annotations = []
75
76
for points in class_annotations .segmentation :
@@ -83,9 +84,9 @@ def segmentations_to_common(class_annotations, class_name):
83
84
84
85
85
86
def process_label (label : Label ,
86
- idx ,
87
- image_root ,
88
- max_annotations_per_image = 10000 ):
87
+ idx : int ,
88
+ image_root : str ,
89
+ max_annotations_per_image = 10000 ) -> Tuple [ np . ndarray , List [ COCOObjectAnnotation ], Dict [ str , str ]] :
89
90
annot_idx = idx * max_annotations_per_image
90
91
image_id = get_image_id (label , idx )
91
92
image = get_image (label , image_root , image_id )
@@ -118,14 +119,14 @@ class CocoInstanceDataset(BaseModel):
118
119
categories : List [Categories ]
119
120
120
121
@classmethod
121
- def from_common (cls , labels : LabelCollection , image_root ):
122
+ def from_common (cls , labels : LabelCollection , image_root : Path , max_workers = 8 ):
122
123
all_coco_annotations = []
123
124
categories = {}
124
125
images = []
125
126
futures = []
126
127
coco_categories = {}
127
128
128
- with ProcessPoolExecutor (max_workers = 8 ) as exc :
129
+ with ProcessPoolExecutor (max_workers = max_workers ) as exc :
129
130
futures = [
130
131
exc .submit (process_label , label , idx , image_root )
131
132
for idx , label in enumerate (labels )
@@ -161,13 +162,13 @@ def to_common(self, image_root):
161
162
annotation_lookup = get_annotation_lookup (self .annotations )
162
163
163
164
for image in self .images :
164
- im_path = os . path . join (image_root , image .file_name )
165
- if not os . path . exists (im_path ):
165
+ im_path = Path (image_root , image .file_name )
166
+ if not im_path . exists ():
166
167
raise ValueError (
167
168
f"Cannot find file { im_path } . Make sure `image_root` is set properly"
168
169
)
169
170
170
- data = ImageData (file_path = im_path )
171
+ data = ImageData (file_path = str ( im_path ) )
171
172
annotations = []
172
173
for class_annotations in annotation_lookup [image .id ]:
173
174
if isinstance (class_annotations .segmentation , RLE ):
0 commit comments