5
5
6
6
import labelbox as lb
7
7
from labelbox .data .annotation_types .data .video import VideoData
8
- from labelbox .schema .data_row import DataRow
9
8
from labelbox .schema .media_type import MediaType
10
9
import labelbox .types as lb_types
11
10
from labelbox .data .annotation_types .data import (
70
69
]
71
70
72
71
73
- def remove_keys_recursive (d , keys ):
74
- for k in keys :
75
- if k in d :
76
- del d [k ]
77
- for k , v in d .items ():
78
- if isinstance (v , dict ):
79
- remove_keys_recursive (v , keys )
80
- elif isinstance (v , list ):
81
- for i in v :
82
- if isinstance (i , dict ):
83
- remove_keys_recursive (i , keys )
84
-
85
-
86
- # NOTE this uses quite a primitive check for cuids but I do not think it is worth coming up with a better one
87
- # Also this function is NOT written with performance in mind, good for small to mid size dicts like we have in our test
88
- def rename_cuid_key_recursive (d ):
89
- new_key = "<cuid>"
90
- for k in list (d .keys ()):
91
- if len (k ) == 25 and not k .isalpha (): # primitive check for cuid
92
- d [new_key ] = d .pop (k )
93
- for k , v in d .items ():
94
- if isinstance (v , dict ):
95
- rename_cuid_key_recursive (v )
96
- elif isinstance (v , list ):
97
- for i in v :
98
- if isinstance (i , dict ):
99
- rename_cuid_key_recursive (i )
100
-
101
-
102
72
def get_annotation_comparison_dicts_from_labels (labels ):
103
73
labels_ndjson = list (NDJsonConverter .serialize (labels ))
104
74
for annotation in labels_ndjson :
@@ -198,12 +168,13 @@ def test_import_data_types(
198
168
data_row_json_by_data_type ,
199
169
annotations_by_data_type ,
200
170
data_type_class ,
171
+ helpers ,
201
172
):
202
173
project = configured_project
203
174
project_id = project .uid
204
175
dataset = initial_dataset
205
176
206
- set_project_media_type_from_data_type (project , data_type_class )
177
+ helpers . set_project_media_type_from_data_type (project , data_type_class )
207
178
208
179
data_type_string = data_type_class .__name__ [:- 4 ].lower ()
209
180
data_row_ndjson = data_row_json_by_data_type [data_type_string ]
@@ -241,12 +212,13 @@ def test_import_data_types_by_global_key(
241
212
rand_gen ,
242
213
data_row_json_by_data_type ,
243
214
annotations_by_data_type ,
215
+ helpers ,
244
216
):
245
217
project = configured_project
246
218
project_id = project .uid
247
219
dataset = initial_dataset
248
220
data_type_class = ImageData
249
- set_project_media_type_from_data_type (project , data_type_class )
221
+ helpers . set_project_media_type_from_data_type (project , data_type_class )
250
222
251
223
data_row_ndjson = data_row_json_by_data_type ["image" ]
252
224
data_row_ndjson ["global_key" ] = str (uuid .uuid4 ())
@@ -287,24 +259,6 @@ def validate_iso_format(date_string: str):
287
259
assert parsed_t .second is not None
288
260
289
261
290
- def to_pascal_case (name : str ) -> str :
291
- return "" .join ([word .capitalize () for word in name .split ("_" )])
292
-
293
-
294
- def set_project_media_type_from_data_type (project , data_type_class ):
295
- data_type_string = data_type_class .__name__ [:- 4 ].lower ()
296
- media_type = to_pascal_case (data_type_string )
297
- if media_type == "Conversation" :
298
- media_type = "Conversational"
299
- elif media_type == "Llmpromptcreation" :
300
- media_type = "LLMPromptCreation"
301
- elif media_type == "Llmpromptresponsecreation" :
302
- media_type = "LLMPromptResponseCreation"
303
- elif media_type == "Llmresponsecreation" :
304
- media_type = "Text"
305
- project .update (media_type = MediaType [media_type ])
306
-
307
-
308
262
@pytest .mark .parametrize (
309
263
"data_type_class" ,
310
264
[
@@ -331,12 +285,13 @@ def test_import_data_types_v2(
331
285
exports_v2_by_data_type ,
332
286
export_v2_test_helpers ,
333
287
rand_gen ,
288
+ helpers ,
334
289
):
335
290
project = configured_project
336
291
dataset = initial_dataset
337
292
project_id = project .uid
338
293
339
- set_project_media_type_from_data_type (project , data_type_class )
294
+ helpers . set_project_media_type_from_data_type (project , data_type_class )
340
295
341
296
data_type_string = data_type_class .__name__ [:- 4 ].lower ()
342
297
data_row_ndjson = data_row_json_by_data_type [data_type_string ]
@@ -381,9 +336,9 @@ def test_import_data_types_v2(
381
336
exported_project_labels = exported_project ["labels" ][0 ]
382
337
exported_annotations = exported_project_labels ["annotations" ]
383
338
384
- remove_keys_recursive (exported_annotations ,
385
- ["feature_id" , "feature_schema_id" ])
386
- rename_cuid_key_recursive (exported_annotations )
339
+ helpers . remove_keys_recursive (exported_annotations ,
340
+ ["feature_id" , "feature_schema_id" ])
341
+ helpers . rename_cuid_key_recursive (exported_annotations )
387
342
assert exported_annotations == exports_v2_by_data_type [data_type_string ]
388
343
389
344
data_row = client .get_data_row (data_row .uid )
@@ -400,10 +355,11 @@ def test_import_label_annotations(
400
355
data_class ,
401
356
annotations ,
402
357
rand_gen ,
358
+ helpers ,
403
359
):
404
360
project = configured_project_with_one_data_row
405
361
dataset = initial_dataset
406
- set_project_media_type_from_data_type (project , data_class )
362
+ helpers . set_project_media_type_from_data_type (project , data_class )
407
363
408
364
data_row_json = data_row_json_by_data_type [data_type ]
409
365
data_row = create_data_row_for_project (project , dataset , data_row_json ,
@@ -471,10 +427,11 @@ def test_import_mal_annotations(
471
427
annotations ,
472
428
rand_gen ,
473
429
one_datarow ,
430
+ helpers ,
474
431
):
475
432
data_row = one_datarow
476
- set_project_media_type_from_data_type (configured_project_with_one_data_row ,
477
- data_class )
433
+ helpers . set_project_media_type_from_data_type (
434
+ configured_project_with_one_data_row , data_class )
478
435
479
436
configured_project_with_one_data_row .create_batch (
480
437
rand_gen (str ),
@@ -500,12 +457,13 @@ def test_import_mal_annotations(
500
457
501
458
def test_import_mal_annotations_global_key (client ,
502
459
configured_project_with_one_data_row ,
503
- rand_gen , one_datarow_global_key ):
460
+ rand_gen , one_datarow_global_key ,
461
+ helpers ):
504
462
data_class = lb_types .VideoData
505
463
data_row = one_datarow_global_key
506
464
annotations = [video_mask_annotation ]
507
- set_project_media_type_from_data_type (configured_project_with_one_data_row ,
508
- data_class )
465
+ helpers . set_project_media_type_from_data_type (
466
+ configured_project_with_one_data_row , data_class )
509
467
510
468
configured_project_with_one_data_row .create_batch (
511
469
rand_gen (str ),
0 commit comments