From 22796f812fc7eb8a7812929b95566f45146b7a37 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:06:51 -0500 Subject: [PATCH 1/4] [PLT-1463] Removed ND deserialize from some unit test part 1 (#1804) --- .../classification_import_global_key.json | 54 -- ...conversation_entity_import_global_key.json | 25 - .../data/assets/ndjson/image_import.json | 779 +---------------- .../ndjson/image_import_global_key.json | 823 ------------------ .../assets/ndjson/image_import_name_only.json | 810 +---------------- .../ndjson/metric_import_global_key.json | 10 - .../assets/ndjson/pdf_import_global_key.json | 155 ---- .../ndjson/polyline_import_global_key.json | 36 - .../ndjson/text_entity_import_global_key.json | 26 - .../ndjson/video_import_global_key.json | 166 ---- .../serialization/ndjson/test_checklist.py | 26 - .../ndjson/test_classification.py | 108 ++- .../serialization/ndjson/test_conversation.py | 71 +- .../serialization/ndjson/test_data_gen.py | 80 +- .../data/serialization/ndjson/test_dicom.py | 26 - .../serialization/ndjson/test_document.py | 294 ++++++- .../ndjson/test_export_video_objects.py | 32 +- .../serialization/ndjson/test_free_text.py | 26 - .../serialization/ndjson/test_global_key.py | 125 +-- .../data/serialization/ndjson/test_image.py | 203 ++++- 20 files changed, 769 insertions(+), 3106 deletions(-) delete mode 100644 libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json delete mode 100644 libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json diff --git a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json deleted file mode 100644 index 4de15e217..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/classification_import_global_key.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - { - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "confidence": 0.8, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" - }, - { - "answer": [ - { - "schemaId": "ckrb1sfl8099e0y919v260awv", - "confidence": 0.82, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } - ], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" - }, - { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "ee70fd88-9f88-48dd-b760-7469ff479b71" - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json deleted file mode 100644 index 83a95e5bf..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/conversation_entity_import_global_key.json +++ /dev/null @@ -1,25 +0,0 @@ -[{ - "location": { - "start": 67, - "end": 128 - }, - "messageId": "some-message-id", - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] -}] diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import.json b/libs/labelbox/tests/data/assets/ndjson/image_import.json index 91563b8ae..75fe36e44 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import.json @@ -8,16 +8,17 @@ "confidence": 0.851, "customMetrics": [ { - "name": "customMetric1", - "value": 0.4 + "name": "customMetric1", + "value": 0.4 } ], "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - } + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + }, + "classifications": [] }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -28,20 +29,17 @@ "confidence": 0.834, "customMetrics": [ { - "name": "customMetric1", - "value": 0.3 + "name": "customMetric1", + "value": 0.3 } ], "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + }, + "classifications": [] }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", "schemaId": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { @@ -50,762 +48,39 @@ "confidence": 0.986, "customMetrics": [ { - "name": "customMetric1", - "value": 0.9 + "name": "customMetric1", + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 + "x": 10.0, + "y": 20.0 }, { - "x": 1119, - "y": 934 + "x": 15.0, + "y": 20.0 }, { - "x": 1118, - "y": 935 + "x": 20.0, + "y": 25.0 }, { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", "schemaId": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json deleted file mode 100644 index 591e40cf6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_global_key.json +++ /dev/null @@ -1,823 +0,0 @@ -[ - { - "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "schemaId": "ckrazcueb16og0z6609jj7y3y", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.851, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - }, - { - "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "schemaId": "ckrazcuec16ok0z66f956apb7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.834, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - } - }, - { - "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "schemaId": "ckrazcuec16oi0z66dzrd8pfl", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "confidence": 0.986, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "polygon": [ - { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 - }, - { - "x": 1099, - "y": 911 - }, - { - "x": 1100, - "y": 911 - }, - { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 - } - ] - }, - { - "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "schemaId": "ckrazcuec16om0z66bhhh4tp7", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "point": { - "x": 2122, - "y": 1457 - } - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json index 82be4cdab..466a03594 100644 --- a/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json +++ b/libs/labelbox/tests/data/assets/ndjson/image_import_name_only.json @@ -1,826 +1,86 @@ [ { "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", - "name": "box a", + "name": "ckrazcueb16og0z6609jj7y3y", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "bbox": { - "top": 1352, - "left": 2275, - "height": 350, - "width": 139 - }, - "confidence": 0.854, + "classifications": [], + "confidence": 0.851, "customMetrics": [ { "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.7 + "value": 0.4 } - ] + ], + "bbox": { + "top": 1352.0, + "left": 2275.0, + "height": 350.0, + "width": 139.0 + } }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", - "name": "mask a", + "name": "ckrazcuec16ok0z66f956apb7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "mask": { - "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [ - 255, - 0, - 0 - ] - }, - "confidence": 0.685, + "classifications": [], + "confidence": 0.834, "customMetrics": [ { "name": "customMetric1", - "value": 0.4 - }, - { - "name": "customMetric2", - "value": 0.9 + "value": 0.3 } - ] + ], + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY" + } }, { + "classifications": [], "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", - "name": "polygon a", + "name": "ckrazcuec16oi0z66dzrd8pfl", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.71, + "confidence": 0.986, "customMetrics": [ { "name": "customMetric1", - "value": 0.1 + "value": 0.9 } ], "polygon": [ { - "x": 1118, - "y": 935 - }, - { - "x": 1117, - "y": 935 - }, - { - "x": 1116, - "y": 935 - }, - { - "x": 1115, - "y": 935 - }, - { - "x": 1114, - "y": 935 - }, - { - "x": 1113, - "y": 935 - }, - { - "x": 1112, - "y": 935 - }, - { - "x": 1111, - "y": 935 - }, - { - "x": 1110, - "y": 935 - }, - { - "x": 1109, - "y": 935 - }, - { - "x": 1108, - "y": 935 - }, - { - "x": 1108, - "y": 934 - }, - { - "x": 1107, - "y": 934 - }, - { - "x": 1106, - "y": 934 - }, - { - "x": 1105, - "y": 934 - }, - { - "x": 1105, - "y": 933 - }, - { - "x": 1104, - "y": 933 - }, - { - "x": 1103, - "y": 933 - }, - { - "x": 1103, - "y": 932 - }, - { - "x": 1102, - "y": 932 - }, - { - "x": 1101, - "y": 932 - }, - { - "x": 1100, - "y": 932 - }, - { - "x": 1099, - "y": 932 - }, - { - "x": 1098, - "y": 932 - }, - { - "x": 1097, - "y": 932 - }, - { - "x": 1097, - "y": 931 - }, - { - "x": 1096, - "y": 931 - }, - { - "x": 1095, - "y": 931 - }, - { - "x": 1094, - "y": 931 - }, - { - "x": 1093, - "y": 931 - }, - { - "x": 1092, - "y": 931 - }, - { - "x": 1091, - "y": 931 - }, - { - "x": 1090, - "y": 931 - }, - { - "x": 1090, - "y": 930 - }, - { - "x": 1089, - "y": 930 - }, - { - "x": 1088, - "y": 930 - }, - { - "x": 1087, - "y": 930 - }, - { - "x": 1087, - "y": 929 - }, - { - "x": 1086, - "y": 929 - }, - { - "x": 1085, - "y": 929 - }, - { - "x": 1084, - "y": 929 - }, - { - "x": 1084, - "y": 928 - }, - { - "x": 1083, - "y": 928 - }, - { - "x": 1083, - "y": 927 - }, - { - "x": 1082, - "y": 927 - }, - { - "x": 1081, - "y": 927 - }, - { - "x": 1081, - "y": 926 - }, - { - "x": 1080, - "y": 926 - }, - { - "x": 1080, - "y": 925 - }, - { - "x": 1079, - "y": 925 - }, - { - "x": 1078, - "y": 925 - }, - { - "x": 1078, - "y": 924 - }, - { - "x": 1077, - "y": 924 - }, - { - "x": 1076, - "y": 924 - }, - { - "x": 1076, - "y": 923 - }, - { - "x": 1075, - "y": 923 - }, - { - "x": 1074, - "y": 923 - }, - { - "x": 1073, - "y": 923 - }, - { - "x": 1073, - "y": 922 - }, - { - "x": 1072, - "y": 922 - }, - { - "x": 1071, - "y": 922 - }, - { - "x": 1070, - "y": 922 - }, - { - "x": 1070, - "y": 921 - }, - { - "x": 1069, - "y": 921 - }, - { - "x": 1068, - "y": 921 - }, - { - "x": 1067, - "y": 921 - }, - { - "x": 1066, - "y": 921 - }, - { - "x": 1065, - "y": 921 - }, - { - "x": 1064, - "y": 921 - }, - { - "x": 1063, - "y": 921 - }, - { - "x": 1062, - "y": 921 - }, - { - "x": 1061, - "y": 921 - }, - { - "x": 1060, - "y": 921 - }, - { - "x": 1059, - "y": 921 - }, - { - "x": 1058, - "y": 921 - }, - { - "x": 1058, - "y": 920 - }, - { - "x": 1057, - "y": 920 - }, - { - "x": 1057, - "y": 919 - }, - { - "x": 1056, - "y": 919 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 918 - }, - { - "x": 1057, - "y": 917 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1058, - "y": 916 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1059, - "y": 915 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1060, - "y": 914 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1061, - "y": 913 - }, - { - "x": 1062, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1063, - "y": 912 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1064, - "y": 911 - }, - { - "x": 1065, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1066, - "y": 910 - }, - { - "x": 1067, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1068, - "y": 909 - }, - { - "x": 1069, - "y": 908 - }, - { - "x": 1070, - "y": 908 - }, - { - "x": 1071, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1072, - "y": 908 - }, - { - "x": 1073, - "y": 907 - }, - { - "x": 1074, - "y": 907 - }, - { - "x": 1075, - "y": 907 - }, - { - "x": 1076, - "y": 907 - }, - { - "x": 1077, - "y": 907 - }, - { - "x": 1078, - "y": 907 - }, - { - "x": 1079, - "y": 907 - }, - { - "x": 1080, - "y": 907 - }, - { - "x": 1081, - "y": 907 - }, - { - "x": 1082, - "y": 907 - }, - { - "x": 1083, - "y": 907 - }, - { - "x": 1084, - "y": 907 - }, - { - "x": 1085, - "y": 907 - }, - { - "x": 1086, - "y": 907 - }, - { - "x": 1087, - "y": 907 - }, - { - "x": 1088, - "y": 907 - }, - { - "x": 1089, - "y": 907 - }, - { - "x": 1090, - "y": 907 - }, - { - "x": 1091, - "y": 907 - }, - { - "x": 1091, - "y": 908 - }, - { - "x": 1092, - "y": 908 - }, - { - "x": 1093, - "y": 908 - }, - { - "x": 1094, - "y": 908 - }, - { - "x": 1095, - "y": 908 - }, - { - "x": 1095, - "y": 909 - }, - { - "x": 1096, - "y": 909 - }, - { - "x": 1097, - "y": 909 - }, - { - "x": 1097, - "y": 910 - }, - { - "x": 1098, - "y": 910 - }, - { - "x": 1099, - "y": 910 + "x": 10.0, + "y": 20.0 }, { - "x": 1099, - "y": 911 + "x": 15.0, + "y": 20.0 }, { - "x": 1100, - "y": 911 + "x": 20.0, + "y": 25.0 }, { - "x": 1101, - "y": 911 - }, - { - "x": 1101, - "y": 912 - }, - { - "x": 1102, - "y": 912 - }, - { - "x": 1103, - "y": 912 - }, - { - "x": 1103, - "y": 913 - }, - { - "x": 1104, - "y": 913 - }, - { - "x": 1104, - "y": 914 - }, - { - "x": 1105, - "y": 914 - }, - { - "x": 1105, - "y": 915 - }, - { - "x": 1106, - "y": 915 - }, - { - "x": 1107, - "y": 915 - }, - { - "x": 1107, - "y": 916 - }, - { - "x": 1108, - "y": 916 - }, - { - "x": 1108, - "y": 917 - }, - { - "x": 1109, - "y": 917 - }, - { - "x": 1109, - "y": 918 - }, - { - "x": 1110, - "y": 918 - }, - { - "x": 1110, - "y": 919 - }, - { - "x": 1111, - "y": 919 - }, - { - "x": 1111, - "y": 920 - }, - { - "x": 1112, - "y": 920 - }, - { - "x": 1112, - "y": 921 - }, - { - "x": 1113, - "y": 921 - }, - { - "x": 1113, - "y": 922 - }, - { - "x": 1114, - "y": 922 - }, - { - "x": 1114, - "y": 923 - }, - { - "x": 1115, - "y": 923 - }, - { - "x": 1115, - "y": 924 - }, - { - "x": 1115, - "y": 925 - }, - { - "x": 1116, - "y": 925 - }, - { - "x": 1116, - "y": 926 - }, - { - "x": 1117, - "y": 926 - }, - { - "x": 1117, - "y": 927 - }, - { - "x": 1117, - "y": 928 - }, - { - "x": 1118, - "y": 928 - }, - { - "x": 1118, - "y": 929 - }, - { - "x": 1119, - "y": 929 - }, - { - "x": 1119, - "y": 930 - }, - { - "x": 1120, - "y": 930 - }, - { - "x": 1120, - "y": 931 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1120, - "y": 932 - }, - { - "x": 1119, - "y": 933 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1119, - "y": 934 - }, - { - "x": 1118, - "y": 935 - }, - { - "x": 1118, - "y": 935 + "x": 10.0, + "y": 20.0 } ] }, { + "classifications": [], "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", - "name": "point a", + "name": "ckrazcuec16om0z66bhhh4tp7", "dataRow": { "id": "ckrazctum0z8a0ybc0b0o0g0v" }, - "confidence": 0.77, - "customMetrics": [ - { - "name": "customMetric2", - "value": 1.2 - } - ], "point": { - "x": 2122, - "y": 1457 + "x": 2122.0, + "y": 1457.0 } } ] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json deleted file mode 100644 index 31be5a4c7..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/metric_import_global_key.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", - "aggregation": "ARITHMETIC_MEAN", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "metricValue": 0.1 - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json deleted file mode 100644 index f4b4894f6..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/pdf_import_global_key.json +++ /dev/null @@ -1,155 +0,0 @@ -[{ - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 4, - "unit": "POINTS", - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 162.73, - "left": 32.45, - "height": 388.16999999999996, - "width": 101.66000000000001 - } -}, { - "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "bbox": { - "top": 223.26, - "left": 251.42, - "height": 457.03999999999996, - "width": 186.78 - } -}, { - "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 6, - "unit": "POINTS", - "confidence": 0.99, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 32.52, - "left": 218.17, - "height": 231.73, - "width": 110.56000000000003 - } -}, { - "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 7, - "unit": "POINTS", - "confidence": 0.89, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ], - "bbox": { - "top": 117.39, - "left": 4.25, - "height": 456.9200000000001, - "width": 164.83 - } -}, { - "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 8, - "unit": "POINTS", - "bbox": { - "top": 82.13, - "left": 217.28, - "height": 279.76, - "width": 82.43000000000004 - } -}, { - "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "boxy", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "page": 3, - "unit": "POINTS", - "bbox": { - "top": 298.12, - "left": 83.34, - "height": 203.83000000000004, - "width": 0.37999999999999545 - } -}, -{ - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 - } - ] -} -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json deleted file mode 100644 index d6a9eecbd..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/polyline_import_global_key.json +++ /dev/null @@ -1,36 +0,0 @@ -[ - { - "line": [ - { - "x": 2534.353, - "y": 249.471 - }, - { - "x": 2429.492, - "y": 182.092 - }, - { - "x": 2294.322, - "y": 221.962 - } - ], - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-line", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.58, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json deleted file mode 100644 index 1f26d8dc8..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/text_entity_import_global_key.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "location": { - "start": 67, - "end": 128 - }, - "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "name": "some-text-entity", - "schemaId": "cl6xnuwt95lqq07330tbb3mfd", - "classifications": [], - "confidence": 0.53, - "customMetrics": [ - { - "name": "customMetric1", - "value": 0.5 - }, - { - "name": "customMetric2", - "value": 0.3 - } - ] - } -] \ No newline at end of file diff --git a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json b/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json deleted file mode 100644 index 11e0753d9..000000000 --- a/libs/labelbox/tests/data/assets/ndjson/video_import_global_key.json +++ /dev/null @@ -1,166 +0,0 @@ -[{ - "answer": { - "schemaId": "ckrb1sfl8099g0y91cxbd5ftb" - }, - "schemaId": "ckrb1sfjx099a0y914hl319ie", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "frames": [{ - "start": 30, - "end": 35 - }, { - "start": 50, - "end": 51 - }] -}, { - "answer": [{ - "schemaId": "ckrb1sfl8099e0y919v260awv" - }], - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", - "frames": [{ - "start": 0, - "end": 5 - }] -}, { - "answer": "a value", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": "3b302706-37ec-4f72-ab2e-757d8bd302b9" -}, { - "classifications": [], - "schemaId": - "cl5islwg200gfci6g0oitaypu", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "6f7c835a-0139-4896-b73f-66a6baa89e94", - "segments": [{ - "keyframes": [{ - "frame": 1, - "line": [{ - "x": 10.0, - "y": 10.0 - }, { - "x": 100.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }, { - "frame": 5, - "line": [{ - "x": 15.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 8, - "line": [{ - "x": 100.0, - "y": 10.0 - }, { - "x": 50.0, - "y": 100.0 - }, { - "x": 50.0, - "y": 30.0 - }], - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5it7ktp00i5ci6gf80b1ysd", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "f963be22-227b-4efe-9be4-2738ed822216", - "segments": [{ - "keyframes": [{ - "frame": 1, - "point": { - "x": 10.0, - "y": 10.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 5, - "point": { - "x": 50.0, - "y": 50.0 - }, - "classifications": [] - }, { - "frame": 10, - "point": { - "x": 10.0, - "y": 50.0 - }, - "classifications": [] - }] - }] -}, { - "classifications": [], - "schemaId": - "cl5iw0roz00lwci6g5jni62vs", - "dataRow": { - "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" - }, - "uuid": - "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", - "segments": [{ - "keyframes": [{ - "frame": 1, - "bbox": { - "top": 10.0, - "left": 5.0, - "height": 100.0, - "width": 150.0 - }, - "classifications": [] - }, { - "frame": 5, - "bbox": { - "top": 30.0, - "left": 5.0, - "height": 50.0, - "width": 150.0 - }, - "classifications": [] - }] - }, { - "keyframes": [{ - "frame": 10, - "bbox": { - "top": 300.0, - "left": 200.0, - "height": 400.0, - "width": 150.0 - }, - "classifications": [] - }] - }] -}] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 0bc3c8924..59f568c75 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -37,13 +37,6 @@ def test_serialization_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_classification(): label = Label( @@ -134,12 +127,6 @@ def test_serialization_with_classification(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested(): label = Label( @@ -233,13 +220,6 @@ def test_serialization_with_classification_double_nested(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) - def test_serialization_with_classification_double_nested_2(): label = Label( @@ -330,9 +310,3 @@ def test_serialization_with_classification_double_nested_2(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - assert label.model_dump(exclude_none=True) == label.model_dump( - exclude_none=True - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py index 8dcb17f0b..82adce99c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_classification.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_classification.py @@ -1,15 +1,73 @@ import json +from labelbox.data.annotation_types.classification.classification import ( + Checklist, + Radio, + Text, +) +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ClassificationAnnotation, + ClassificationAnswer, +) +from labelbox.data.mixins import CustomMetric + def test_classification(): with open( "tests/data/assets/ndjson/classification_import.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.8, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.82, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "78ff6a23-bebe-475c-8f67-4c456909648f"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data @@ -18,6 +76,48 @@ def test_classification_with_name(): "tests/data/assets/ndjson/classification_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + name="classification a", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="choice 1", + ), + ), + ), + ClassificationAnnotation( + name="classification b", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.945, + name="choice 2", + ) + ], + ), + ), + ClassificationAnnotation( + name="classification c", + extra={"uuid": "150d60de-30af-44e4-be20-55201c533312"}, + value=Text(answer="a value"), + ), + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index f7da9181b..561f9ce86 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -1,8 +1,12 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.mixins import CustomMetric radio_ndjson = [ { @@ -99,25 +103,62 @@ def test_message_based_radio_classification(label, ndjson): serialized_label[0].pop("uuid") assert serialized_label == ndjson - deserialized_label = list(NDJsonConverter().deserialize(ndjson)) - deserialized_label[0].annotations[0].extra.pop("uuid") - assert deserialized_label[0].model_dump(exclude_none=True) == label[ - 0 - ].model_dump(exclude_none=True) +def test_conversation_entity_import(): + with open( + "tests/data/assets/ndjson/conversation_entity_import.json", "r" + ) as file: + data = json.load(file) -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/conversation_entity_import.json", + label = lb_types.Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) + assert res == data + + +def test_conversation_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json", - ], -) -def test_conversation_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + label = lb_types.Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + lb_types.ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={"uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4"}, + value=lb_types.ConversationEntity( + start=67, end=128, extra={}, message_id="some-message-id" + ), + ) + ], + ) + + res = list(NDJsonConverter.serialize([label])) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py index 333c00250..999e1bda5 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_data_gen.py @@ -1,67 +1,29 @@ -from copy import copy -import pytest import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import ( - NDDicomSegments, - NDDicomSegment, - NDDicomLine, -) - -""" -Data gen prompt test data -""" - -prompt_text_annotation = lb_types.PromptClassificationAnnotation( - feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", - name="test", - value=lb_types.PromptText( - answer="the answer to the text questions right here" - ), -) - -prompt_text_ndjson = { - "answer": "the answer to the text questions right here", - "name": "test", - "schemaId": "ckrb1sfkn099c0y910wbo0p1a", - "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, -} - -data_gen_label = lb_types.Label( - data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, - annotations=[prompt_text_annotation], -) - -""" -Prompt annotation test -""" def test_serialize_label(): - serialized_label = next(NDJsonConverter().serialize([data_gen_label])) - # Remove uuid field since this is a random value that can not be specified also meant for relationships - del serialized_label["uuid"] - assert serialized_label == prompt_text_ndjson - - -def test_deserialize_label(): - deserialized_label = next( - NDJsonConverter().deserialize([prompt_text_ndjson]) + prompt_text_annotation = lb_types.PromptClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + name="test", + extra={"uuid": "test"}, + value=lb_types.PromptText( + answer="the answer to the text questions right here" + ), ) - if hasattr(deserialized_label.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized_label.annotations[0].extra = {} - assert deserialized_label.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + prompt_text_ndjson = { + "answer": "the answer to the text questions right here", + "name": "test", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "test", + } + + data_gen_label = lb_types.Label( + data={"uid": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + annotations=[prompt_text_annotation], + ) + serialized_label = next(NDJsonConverter().serialize([data_gen_label])) -def test_serialize_deserialize_label(): - serialized = list(NDJsonConverter.serialize([data_gen_label])) - deserialized = next(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized.annotations[0], "extra"): - # Extra fields are added to deserialized label by default need removed to match - deserialized.annotations[0].extra = {} - assert deserialized.model_dump( - exclude_none=True - ) == data_gen_label.model_dump(exclude_none=True) + assert serialized_label == prompt_text_ndjson diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 633214367..762891aa2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -1,6 +1,5 @@ from copy import copy import pytest -import base64 import labelbox.types as lb_types from labelbox.data.serialization import NDJsonConverter from labelbox.data.serialization.ndjson.objects import ( @@ -181,28 +180,3 @@ def test_serialize_label(label, ndjson): if "uuid" in serialized_label: serialized_label.pop("uuid") assert serialized_label == ndjson - - -@pytest.mark.parametrize("label, ndjson", labels_ndjsons) -def test_deserialize_label(label, ndjson): - deserialized_label = next(NDJsonConverter().deserialize([ndjson])) - if hasattr(deserialized_label.annotations[0], "extra"): - deserialized_label.annotations[0].extra = {} - for i, annotation in enumerate(deserialized_label.annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value - - -@pytest.mark.parametrize("label", labels) -def test_serialize_deserialize_label(label): - serialized = list(NDJsonConverter.serialize([label])) - deserialized = list(NDJsonConverter.deserialize(serialized)) - if hasattr(deserialized[0].annotations[0], "extra"): - deserialized[0].annotations[0].extra = {} - for i, annotation in enumerate(deserialized[0].annotations): - if hasattr(annotation, "frames"): - assert annotation.frames == label.annotations[i].frames - if hasattr(annotation, "value"): - assert annotation.value == label.annotations[i].value diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index 5fe6a9789..a0897ad9f 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -1,6 +1,19 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + RectangleUnit, + Point, + DocumentRectangle, + DocumentEntity, + DocumentTextSelection, +) bbox_annotation = lb_types.ObjectAnnotation( name="bounding_box", # must match your ontology feature's name @@ -53,10 +66,144 @@ def test_pdf(): """ with open("tests/data/assets/ndjson/pdf_import.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + uid=None, + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.89, + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_with_name_only(): @@ -65,26 +212,135 @@ def test_pdf_with_name_only(): """ with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.99, + name="boxy", + feature_schema_id=None, + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=DocumentRectangle( + start=Point(x=32.45, y=162.73), + end=Point(x=134.11, y=550.9), + page=4, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + }, + value=DocumentRectangle( + start=Point(x=251.42, y=223.26), + end=Point(x=438.2, y=680.3), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + }, + value=DocumentRectangle( + start=Point(x=218.17, y=32.52), + end=Point(x=328.73, y=264.25), + page=6, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.74, + name="boxy", + extra={ + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + }, + value=DocumentRectangle( + start=Point(x=4.25, y=117.39), + end=Point(x=169.08, y=574.3100000000001), + page=7, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + }, + value=DocumentRectangle( + start=Point(x=217.28, y=82.13), + end=Point(x=299.71000000000004, y=361.89), + page=8, + unit=RectangleUnit.POINTS, + ), + ), + ObjectAnnotation( + name="boxy", + extra={ + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + }, + value=DocumentRectangle( + start=Point(x=83.34, y=298.12), + end=Point(x=83.72, y=501.95000000000005), + page=3, + unit=RectangleUnit.POINTS, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="named_entity", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=DocumentEntity( + text_selections=[ + DocumentTextSelection( + token_ids=[ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c", + ], + group_id="2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + page=1, + ) + ] + ), + ) + ], + ), + ] + res = list(NDJsonConverter.serialize(labels)) assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() def test_pdf_bbox_serialize(): serialized = list(NDJsonConverter.serialize(bbox_labels)) serialized[0].pop("uuid") assert serialized == bbox_ndjson - - -def test_pdf_bbox_deserialize(): - deserialized = list(NDJsonConverter.deserialize(bbox_ndjson)) - deserialized[0].annotations[0].extra = {} - assert ( - deserialized[0].annotations[0].value - == bbox_labels[0].annotations[0].value - ) - assert ( - deserialized[0].annotations[0].name - == bbox_labels[0].annotations[0].name - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 4adcd9935..1ab678cde 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -9,8 +9,6 @@ def video_bbox_label(): uid="cl1z52xwh00050fhcmfgczqvn", data=VideoData( uid="cklr9mr4m5iao0rb6cvxu4qbn", - file_path=None, - frames=None, url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), annotations=[ @@ -22,6 +20,7 @@ def video_bbox_label(): "instanceURI": None, "color": "#1CE6FF", "feature_id": "cl1z52xw700000fhcayaqy0ev", + "uuid": "b24e672b-8f79-4d96-bf5e-b552ca0820d5", }, value=Rectangle( extra={}, @@ -588,31 +587,4 @@ def test_serialize_video_objects(): serialized_labels = NDJsonConverter.serialize([label]) label = next(serialized_labels) - manual_label = video_serialized_bbox_label() - - for key in label.keys(): - # ignore uuid because we randomize if there was none - if key != "uuid": - assert label[key] == manual_label[key] - - assert len(label["segments"]) == 2 - assert len(label["segments"][0]["keyframes"]) == 2 - assert len(label["segments"][1]["keyframes"]) == 4 - - # #converts back only the keyframes. should be the sum of all prev segments - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - assert len(label.annotations) == 6 - - -def test_confidence_is_ignored(): - label = video_bbox_label() - serialized_labels = NDJsonConverter.serialize([label]) - label = next(serialized_labels) - label["confidence"] = 0.453 - label["segments"][0]["confidence"] = 0.453 - - deserialized_labels = NDJsonConverter.deserialize([label]) - label = next(deserialized_labels) - for annotation in label.annotations: - assert annotation.confidence is None + assert label == video_serialized_bbox_label() diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 84c017497..349be13a8 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -34,16 +34,6 @@ def test_serialization(): assert res["answer"] == "text_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "text_answer" - assert annotation_value.confidence == 0.5 - def test_nested_serialization(): label = Label( @@ -102,19 +92,3 @@ def test_nested_serialization(): assert sub_classification["name"] == "nested answer" assert sub_classification["answer"] == "nested answer" assert sub_classification["confidence"] == 0.7 - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - answer = annotation.value.answer[0] - assert answer.confidence == 0.9 - assert answer.name == "first_answer" - - classification_answer = answer.classifications[0].value.answer - assert classification_answer.confidence == 0.8 - assert classification_answer.name == "first_sub_radio_answer" - - sub_classification_answer = classification_answer.classifications[0].value - assert type(sub_classification_answer) is Text - assert sub_classification_answer.answer == "nested answer" - assert sub_classification_answer.confidence == 0.7 diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py index 2b3fa7f8c..d104a691e 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_global_key.py @@ -1,73 +1,74 @@ -import json -import pytest - -from labelbox.data.serialization.ndjson.classification import NDRadio - +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine - - -def round_dict(data): - if isinstance(data, dict): - for key in data: - if isinstance(data[key], float): - data[key] = int(data[key]) - elif isinstance(data[key], dict): - data[key] = round_dict(data[key]) - elif isinstance(data[key], (list, tuple)): - data[key] = [round_dict(r) for r in data[key]] +from labelbox.types import ( + Label, + ClassificationAnnotation, + Radio, + ClassificationAnswer, +) - return data +def test_generic_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/classification_import_global_key.json", - "tests/data/assets/ndjson/metric_import_global_key.json", - "tests/data/assets/ndjson/polyline_import_global_key.json", - "tests/data/assets/ndjson/text_entity_import_global_key.json", - "tests/data/assets/ndjson/conversation_entity_import_global_key.json", - ], -) -def test_many_types(filename: str): - with open(filename, "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data - f.close() + label = Label( + data=GenericDataRowData( + global_key="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) + res = list(NDJsonConverter.serialize([label])) -def test_image(): - with open( - "tests/data/assets/ndjson/image_import_global_key.json", "r" - ) as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() + assert res == expected -def test_pdf(): - with open("tests/data/assets/ndjson/pdf_import_global_key.json", "r") as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - f.close() +def test_dict_data_row_global_key_included(): + expected = [ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"globalKey": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + } + ] + label = Label( + data={ + "global_key": "ckrb1sf1i1g7i0ybcdc6oc8ct", + }, + annotations=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ) -def test_video(): - with open( - "tests/data/assets/ndjson/video_import_global_key.json", "r" - ) as f: - data = json.load(f) + res = list(NDJsonConverter.serialize([label])) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] - f.close() + assert res == expected diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index 1729e1f46..d67acb9c3 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -1,4 +1,8 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric import numpy as np import cv2 @@ -10,6 +14,7 @@ ImageData, MaskData, ) +from labelbox.types import Rectangle, Polygon, Point def round_dict(data): @@ -29,12 +34,74 @@ def test_image(): with open("tests/data/assets/ndjson/image_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + feature_schema_id="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_image_with_name_only(): @@ -43,11 +110,74 @@ def test_image_with_name_only(): ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.851, + name="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Rectangle( + start=Point(extra={}, x=2275.0, y=1352.0), + end=Point(extra={}, x=2414.0, y=1702.0), + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.3) + ], + confidence=0.834, + name="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=[255, 0, 0], + ), + ), + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.9) + ], + confidence=0.986, + name="ckrazcuec16oi0z66dzrd8pfl", + extra={ + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + }, + value=Polygon( + points=[ + Point(x=10.0, y=20.0), + Point(x=15.0, y=20.0), + Point(x=20.0, y=25.0), + Point(x=10.0, y=20.0), + ], + ), + ), + ObjectAnnotation( + name="ckrazcuec16om0z66bhhh4tp7", + extra={ + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + }, + value=Point(x=2122.0, y=1457.0), + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + del res[1]["mask"]["colorRGB"] # JSON does not support tuples + assert res == data def test_mask(): @@ -57,10 +187,11 @@ def test_mask(): "schemaId": "ckrazcueb16og0z6609jj7y3y", "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { - "png": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAAAAACoWZBhAAAAMklEQVR4nD3MuQ3AQADDMOqQ/Vd2ijytaSiZLAcYuyLEYYYl9cvrlGftTHvsYl+u/3EDv0QLI8Z7FlwAAAAASUVORK5CYII=" + "png": "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVR4nGNgYGBkZAAAAAsAA+RRQXwAAAAASUVORK5CYII=" }, "confidence": 0.8, "customMetrics": [{"name": "customMetric1", "value": 0.4}], + "classifications": [], }, { "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", @@ -68,16 +199,54 @@ def test_mask(): "dataRow": {"id": "ckrazctum0z8a0ybc0b0o0g0v"}, "mask": { "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", - "colorRGB": [255, 0, 0], + "colorRGB": (255, 0, 0), }, + "classifications": [], }, ] - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - for r in res: - r.pop("classifications", None) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + mask_numpy = np.array([[[1, 1, 0], [1, 0, 1]], [[1, 1, 1], [1, 1, 1]]]) + mask_numpy = mask_numpy.astype(np.uint8) + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrazctum0z8a0ybc0b0o0g0v", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.4) + ], + confidence=0.8, + feature_schema_id="ckrazcueb16og0z6609jj7y3y", + extra={ + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + }, + value=Mask( + mask=MaskData(arr=mask_numpy), + color=(1, 1, 1), + ), + ), + ObjectAnnotation( + feature_schema_id="ckrazcuec16ok0z66f956apb7", + extra={ + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + }, + value=Mask( + extra={}, + mask=MaskData( + url="https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + ), + color=(255, 0, 0), + ), + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + + assert res == data def test_mask_from_arr(): From 94a309679e3412f8e2114621f3c9f39466867d03 Mon Sep 17 00:00:00 2001 From: Gabe <33893811+Gabefire@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:08:20 -0500 Subject: [PATCH 2/4] [PLT-1463] Removed ND deserialize from some unit test part 2 (#1815) --- .../data/serialization/ndjson/test_metric.py | 170 +++- .../data/serialization/ndjson/test_mmc.py | 125 ++- .../ndjson/test_ndlabel_subclass_matching.py | 19 - .../data/serialization/ndjson/test_nested.py | 236 ++++- .../serialization/ndjson/test_polyline.py | 82 +- .../data/serialization/ndjson/test_radio.py | 16 - .../serialization/ndjson/test_rectangle.py | 43 +- .../serialization/ndjson/test_relationship.py | 151 ++- .../data/serialization/ndjson/test_text.py | 10 - .../serialization/ndjson/test_text_entity.py | 69 +- .../data/serialization/ndjson/test_video.py | 868 +++++++++++++++++- 11 files changed, 1593 insertions(+), 196 deletions(-) delete mode 100644 libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py index 45c5c67bf..40e098405 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_metric.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_metric.py @@ -1,38 +1,166 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.annotation_types.metrics.confusion_matrix import ( + ConfusionMatrixMetric, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ScalarMetric, + ScalarMetricAggregation, + ConfusionMatrixAggregation, +) def test_metric(): with open("tests/data/assets/ndjson/metric_import.json", "r") as file: data = json.load(file) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert reserialized == data + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + aggregation=ScalarMetricAggregation.ARITHMETIC_MEAN, + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) + assert res == data def test_custom_scalar_metric(): - with open( - "tests/data/assets/ndjson/custom_scalar_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": 0.1, + "metricName": "custom_iou", + "featureName": "sample_class", + "aggregation": "SUM", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: 0.1, 0.2: 0.5}, + "metricName": "custom_iou", + "aggregation": "SUM", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ScalarMetric( + value=0.1, + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value=0.1, + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ScalarMetric( + value={"0.1": 0.1, "0.2": 0.5}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="custom_iou", + aggregation=ScalarMetricAggregation.SUM, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert res == data def test_custom_confusion_matrix_metric(): - with open( - "tests/data/assets/ndjson/custom_confusion_matrix_import.json", "r" - ) as file: - data = json.load(file) + data = [ + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (1, 1, 2, 3), + "metricName": "50%_iou", + "featureName": "sample_class", + "subclassName": "sample_subclass", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": (0, 1, 2, 5), + "metricName": "50%_iou", + "featureName": "sample_class", + "aggregation": "CONFUSION_MATRIX", + }, + { + "uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674", + "dataRow": {"id": "ckrmdnqj4000007msh9p2a27r"}, + "metricValue": {0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + "metricName": "50%_iou", + "aggregation": "CONFUSION_MATRIX", + }, + ] + + labels = [ + Label( + data=GenericDataRowData( + uid="ckrmdnqj4000007msh9p2a27r", + ), + annotations=[ + ConfusionMatrixMetric( + value=(1, 1, 2, 3), + feature_name="sample_class", + subclass_name="sample_subclass", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7672"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value=(0, 1, 2, 5), + feature_name="sample_class", + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7673"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ConfusionMatrixMetric( + value={0.1: (0, 1, 2, 3), 0.2: (5, 3, 4, 3)}, + extra={"uuid": "a22bbf6e-b2da-4abe-9a11-df84759f7674"}, + metric_name="50%_iou", + aggregation=ConfusionMatrixAggregation.CONFUSION_MATRIX, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) - label_list = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(label_list)) - assert json.dumps(reserialized, sort_keys=True) == json.dumps( - data, sort_keys=True - ) + assert data == res diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py index 69594ff73..202f793fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_mmc.py @@ -1,32 +1,125 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import pytest from labelbox.data.serialization import NDJsonConverter +from labelbox.types import ( + Label, + MessageEvaluationTaskAnnotation, + MessageSingleSelectionTask, + MessageMultiSelectionTask, + MessageInfo, + OrderedMessageInfo, + MessageRankingTask, +) def test_message_task_annotation_serialization(): with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: data = json.load(file) - deserialized = list(NDJsonConverter.deserialize(data)) - reserialized = list(NDJsonConverter.serialize(deserialized)) + labels = [ + Label( + data=GenericDataRowData( + uid="cnjencjencjfencvj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="single-selection", + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, + value=MessageSingleSelectionTask( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + parent_message_id="clxfznjb800073b6v43ppx9ca", + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cfcerfvergerfefj", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="multi-selection", + extra={"uuid": "gferf3a57-597e-48cb-8d8d-a8526fefe72"}, + value=MessageMultiSelectionTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + selected_messages=[ + MessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 5", + ) + ], + ), + ) + ], + ), + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={"uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72"}, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=2, + ), + ], + ), + ) + ], + ), + ] - assert data == reserialized + res = list(NDJsonConverter.serialize(labels)) + assert res == data -def test_mesage_ranking_task_wrong_order_serialization(): - with open("tests/data/assets/ndjson/mmc_import.json", "r") as file: - data = json.load(file) - - some_ranking_task = next( - task - for task in data - if task["messageEvaluationTask"]["format"] == "message-ranking" - ) - some_ranking_task["messageEvaluationTask"]["data"]["rankedMessages"][0][ - "order" - ] = 3 +def test_mesage_ranking_task_wrong_order_serialization(): with pytest.raises(ValueError): - list(NDJsonConverter.deserialize([some_ranking_task])) + ( + Label( + data=GenericDataRowData( + uid="cwefgtrgrthveferfferffr", + ), + annotations=[ + MessageEvaluationTaskAnnotation( + name="ranking", + extra={ + "uuid": "hybe3a57-5gt7e-48tgrb-8d8d-a852dswqde72" + }, + value=MessageRankingTask( + parent_message_id="clxfznjb800073b6v43ppx9ca", + ranked_messages=[ + OrderedMessageInfo( + message_id="clxfzocbm00093b6vx4ndisub", + model_config_name="GPT 5", + order=1, + ), + OrderedMessageInfo( + message_id="clxfzocbm00083b6v8vczsept", + model_config_name="GPT 4 with temperature 0.7", + order=1, + ), + ], + ), + ) + ], + ), + ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py b/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py deleted file mode 100644 index 790bd87b3..000000000 --- a/libs/labelbox/tests/data/serialization/ndjson/test_ndlabel_subclass_matching.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from labelbox.data.serialization.ndjson.label import NDLabel -from labelbox.data.serialization.ndjson.objects import NDDocumentRectangle -import pytest - - -def test_bad_annotation_input(): - data = [{"test": 3}] - with pytest.raises(ValueError): - NDLabel(**{"annotations": data}) - - -def test_correct_annotation_input(): - with open("tests/data/assets/ndjson/pdf_import_name_only.json", "r") as f: - data = json.load(f) - assert isinstance( - NDLabel(**{"annotations": [data[0]]}).annotations[0], - NDDocumentRectangle, - ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py index e0f0df0e6..3633c9cbe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_nested.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_nested.py @@ -1,13 +1,135 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Rectangle, + Point, + ClassificationAnnotation, + Radio, + ClassificationAnswer, + Text, + Checklist, +) def test_nested(): with open("tests/data/assets/ndjson/nested_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.34, + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ), + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "5d03213e-4408-456c-9eca-cf0723202961", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + value=Checklist( + answer=[ + ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.894, + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={ + "uuid": "d50812f6-34eb-4f12-b3cb-bbde51a31d83", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={}, + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -16,6 +138,112 @@ def test_nested_name_only(): "tests/data/assets/ndjson/nested_import_name_only.json", "r" ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="box a", + extra={ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + value=Rectangle( + start=Point(x=2275.0, y=1352.0), + end=Point(x=2414.0, y=1702.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification a", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.811, + name="first answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box b", + extra={ + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification b", + value=Radio( + answer=ClassificationAnswer( + custom_metrics=[ + CustomMetric( + name="customMetric1", value=0.5 + ), + CustomMetric( + name="customMetric2", value=0.3 + ), + ], + confidence=0.815, + name="second answer", + ), + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "8a2b2c43-f0a1-4763-ba96-e322d986ced6", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="classification c", + value=Checklist( + answer=[ + ClassificationAnswer( + name="third answer", + ) + ], + ), + ) + ], + ), + ObjectAnnotation( + name="box c", + extra={ + "uuid": "456dd2c6-9fa0-42f9-9809-acc27b9886a7", + }, + value=Rectangle( + start=Point(x=2089.0, y=1251.0), + end=Point(x=2247.0, y=1679.0), + ), + classifications=[ + ClassificationAnnotation( + name="a string", + value=Text( + answer="a string", + ), + ) + ], + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py index 97d48a14e..cd11d97fe 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_polyline.py @@ -1,18 +1,76 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ObjectAnnotation, Point, Line, Label -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/polyline_without_confidence_import.json", - "tests/data/assets/ndjson/polyline_import.json", - ], -) -def test_polyline_import(filename: str): - with open(filename, "r") as file: +def test_polyline_import_with_confidence(): + with open( + "tests/data/assets/ndjson/polyline_without_confidence_import.json", "r" + ) as file: + data = json.load(file) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data + + +def test_polyline_import_without_confidence(): + with open("tests/data/assets/ndjson/polyline_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.58, + name="some-line", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=Line( + points=[ + Point(x=2534.353, y=249.471), + Point(x=2429.492, y=182.092), + Point(x=2294.322, y=221.962), + ], + ), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index bd80f9267..4458e335c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -1,4 +1,3 @@ -import json from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( ClassificationAnswer, @@ -40,14 +39,6 @@ def test_serialization_with_radio_min(): res.pop("uuid") assert res == expected - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - - for i, annotation in enumerate(res.annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_serialization_with_radio_classification(): label = Label( @@ -101,10 +92,3 @@ def test_serialization_with_radio_classification(): res = next(serialized) res.pop("uuid") assert res == expected - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - res.annotations[0].extra.pop("uuid") - assert res.annotations[0].model_dump( - exclude_none=True - ) == label.annotations[0].model_dump(exclude_none=True) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py index 66630dbb5..0e42ab152 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_rectangle.py @@ -1,6 +1,10 @@ import json +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) import labelbox.types as lb_types from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, Rectangle, Point DATAROW_ID = "ckrb1sf1i1g7i0ybcdc6oc8ct" @@ -8,8 +12,26 @@ def test_rectangle(): with open("tests/data/assets/ndjson/rectangle_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="ckrb1sf1i1g7i0ybcdc6oc8ct", + ), + annotations=[ + ObjectAnnotation( + name="bbox", + extra={ + "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", + }, + value=Rectangle( + start=Point(x=38.0, y=28.0), + end=Point(x=81.0, y=69.0), + ), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) assert res == data @@ -39,8 +61,6 @@ def test_rectangle_inverted_start_end_points(): ), extra={ "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, }, ) @@ -48,8 +68,9 @@ def test_rectangle_inverted_start_end_points(): data={"uid": DATAROW_ID}, annotations=[expected_bbox] ) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + + assert res == data def test_rectangle_mixed_start_end_points(): @@ -76,17 +97,13 @@ def test_rectangle_mixed_start_end_points(): start=lb_types.Point(x=38, y=28), end=lb_types.Point(x=81, y=69), ), - extra={ - "uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72", - "page": None, - "unit": None, - }, + extra={"uuid": "c1be3a57-597e-48cb-8d8d-a852665f9e72"}, ) label = lb_types.Label(data={"uid": DATAROW_ID}, annotations=[bbox]) - res = list(NDJsonConverter.deserialize(res)) - assert res == [label] + data = list(NDJsonConverter.serialize([label])) + assert res == data def test_benchmark_reference_label_flag_enabled(): diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py index f33719035..235b66957 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_relationship.py @@ -1,16 +1,135 @@ import json -from uuid import uuid4 -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import ( + Label, + ObjectAnnotation, + Point, + Rectangle, + RelationshipAnnotation, + Relationship, +) def test_relationship(): with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) + res = [ + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + extra={}, + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + Label( + data=GenericDataRowData( + uid="clf98gj90000qp38ka34yhptl-DIFFERENT", + ), + annotations=[ + ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + RelationshipAnnotation( + name="is chasing", + extra={"uuid": "0e6354eb-9adb-47e5-8e52-217ed016d948"}, + value=Relationship( + source=ObjectAnnotation( + name="dog", + extra={ + "uuid": "9b1e1249-36b4-4665-b60a-9060e0d18660", + }, + value=Rectangle( + start=Point(x=400.0, y=500.0), + end=Point(x=600.0, y=700.0), + ), + ), + target=ObjectAnnotation( + name="cat", + extra={ + "uuid": "d8813907-b15d-4374-bbe6-b9877fb42ccd", + }, + value=Rectangle( + start=Point(x=100.0, y=200.0), + end=Point(x=200.0, y=300.0), + ), + ), + type=Relationship.Type.UNIDIRECTIONAL, + ), + ), + ], + ), + ] res = list(NDJsonConverter.serialize(res)) assert len(res) == len(data) @@ -44,29 +163,3 @@ def test_relationship(): assert res_relationship_second_annotation["relationship"]["target"] in [ annot["uuid"] for annot in res_source_and_target ] - - -def test_relationship_nonexistent_object(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - relationship_annotation = data[2] - source_uuid = relationship_annotation["relationship"]["source"] - target_uuid = str(uuid4()) - relationship_annotation["relationship"]["target"] = target_uuid - error_msg = f"Relationship object refers to nonexistent object with UUID '{source_uuid}' and/or '{target_uuid}'" - - with pytest.raises(ValueError, match=error_msg): - list(NDJsonConverter.deserialize(data)) - - -def test_relationship_duplicate_uuids(): - with open("tests/data/assets/ndjson/relationship_import.json", "r") as file: - data = json.load(file) - - source, target = data[0], data[1] - target["uuid"] = source["uuid"] - error_msg = f"UUID '{source['uuid']}' is not unique" - - with pytest.raises(AssertionError, match=error_msg): - list(NDJsonConverter.deserialize(data)) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index d5e81c51a..21db389cb 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -1,7 +1,5 @@ from labelbox.data.annotation_types.annotation import ClassificationAnnotation from labelbox.data.annotation_types.classification.classification import ( - ClassificationAnswer, - Radio, Text, ) from labelbox.data.annotation_types.data.text import TextData @@ -34,11 +32,3 @@ def test_serialization(): assert res["name"] == "radio_question_geo" assert res["answer"] == "first_radio_answer" assert res["dataRow"]["id"] == "bkj7z2q0b0000jx6x0q2q7q0d" - - deserialized = NDJsonConverter.deserialize([res]) - res = next(deserialized) - annotation = res.annotations[0] - - annotation_value = annotation.value - assert type(annotation_value) is Text - assert annotation_value.answer == "first_radio_answer" diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py index 3e856f001..fb93f15d4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text_entity.py @@ -1,21 +1,68 @@ import json -import pytest +from labelbox.data.annotation_types.data.generic_data_row_data import ( + GenericDataRowData, +) +from labelbox.data.mixins import CustomMetric from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.types import Label, ObjectAnnotation, TextEntity + + +def test_text_entity_import(): + with open("tests/data/assets/ndjson/text_entity_import.json", "r") as file: + data = json.load(file) + + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + custom_metrics=[ + CustomMetric(name="customMetric1", value=0.5), + CustomMetric(name="customMetric2", value=0.3), + ], + confidence=0.53, + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) + assert res == data -@pytest.mark.parametrize( - "filename", - [ - "tests/data/assets/ndjson/text_entity_import.json", +def test_text_entity_import_without_confidence(): + with open( "tests/data/assets/ndjson/text_entity_without_confidence_import.json", - ], -) -def test_text_entity_import(filename: str): - with open(filename, "r") as file: + "r", + ) as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=GenericDataRowData( + uid="cl6xnv9h61fv0085yhtoq06ht", + ), + annotations=[ + ObjectAnnotation( + name="some-text-entity", + feature_schema_id="cl6xnuwt95lqq07330tbb3mfd", + extra={ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + }, + value=TextEntity(start=67, end=128, extra={}), + ) + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) assert res == data diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index c7a6535c4..4fba5c2ca 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -1,10 +1,10 @@ import json -from labelbox.client import Client from labelbox.data.annotation_types.classification.classification import ( Checklist, ClassificationAnnotation, ClassificationAnswer, Radio, + Text, ) from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.geometry.line import Line @@ -13,8 +13,10 @@ from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.video import VideoObjectAnnotation -from labelbox import parser +from labelbox.data.annotation_types.video import ( + VideoClassificationAnnotation, + VideoObjectAnnotation, +) from labelbox.data.serialization.ndjson.converter import NDJsonConverter from operator import itemgetter @@ -24,15 +26,275 @@ def test_video(): with open("tests/data/assets/ndjson/video_import.json", "r") as file: data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfjx099a0y914hl319ie", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + feature_schema_id="ckrb1sfl8099g0y91cxbd5ftb", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + feature_schema_id="ckrb1sfl8099e0y919v260awv", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + feature_schema_id="ckrb1sfkn099c0y910wbo0p1a", + extra={"uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5islwg200gfci6g0oitaypu", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5it7ktp00i5ci6gf80b1ysd", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + feature_schema_id="cl5iw0roz00lwci6g5jni62vs", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_name_only(): @@ -40,16 +302,274 @@ def test_video_name_only(): "tests/data/assets/ndjson/video_import_name_only.json", "r" ) as file: data = json.load(file) - - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - + labels = [ + Label( + data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + annotations=[ + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=30, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=31, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=32, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=33, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=34, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=35, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=50, + ), + VideoClassificationAnnotation( + name="question 1", + extra={"uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"}, + value=Radio( + answer=ClassificationAnswer( + name="answer 1", + ), + ), + frame=51, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=0, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=1, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=2, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=3, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=4, + ), + VideoClassificationAnnotation( + name="question 2", + extra={"uuid": "d009925d-91a3-4f67-abd9-753453f5a584"}, + value=Checklist( + answer=[ + ClassificationAnswer( + name="answer 2", + ) + ], + ), + frame=5, + ), + ClassificationAnnotation( + name="question 3", + extra={"uuid": "e5f32456-bd67-4520-8d3b-cbeb2204bad3"}, + value=Text(answer="a value"), + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=10.0, y=10.0), + Point(x=100.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=15.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 1", + extra={"uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94"}, + value=Line( + points=[ + Point(x=100.0, y=10.0), + Point(x=50.0, y=100.0), + Point(x=50.0, y=30.0), + ], + ), + frame=8, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=10.0), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=50.0, y=50.0), + frame=5, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 2", + extra={"uuid": "f963be22-227b-4efe-9be4-2738ed822216"}, + value=Point(x=10.0, y=50.0), + frame=10, + keyframe=True, + segment_index=1, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=10.0), + end=Point(x=155.0, y=110.0), + ), + frame=1, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=5.0, y=30.0), + end=Point(x=155.0, y=80.0), + ), + frame=5, + keyframe=True, + segment_index=0, + ), + VideoObjectAnnotation( + name="segment 3", + extra={"uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7"}, + value=Rectangle( + start=Point(x=200.0, y=300.0), + end=Point(x=350.0, y=700.0), + ), + frame=10, + keyframe=True, + segment_index=1, + ), + ], + ) + ] + res = list(NDJsonConverter.serialize(labels)) data = sorted(data, key=itemgetter("uuid")) res = sorted(res, key=itemgetter("uuid")) - pairs = zip(data, res) - for data, res in pairs: - assert data == res + assert data == res def test_video_classification_global_subclassifications(): @@ -67,7 +587,6 @@ def test_video_classification_global_subclassifications(): ClassificationAnnotation( name="nested_checklist_question", value=Checklist( - name="checklist", answer=[ ClassificationAnswer( name="first_checklist_answer", @@ -94,7 +613,7 @@ def test_video_classification_global_subclassifications(): "dataRow": {"globalKey": "sample-video-4.mp4"}, } - expected_second_annotation = nested_checklist_annotation_ndjson = { + expected_second_annotation = { "name": "nested_checklist_question", "answer": [ { @@ -116,12 +635,6 @@ def test_video_classification_global_subclassifications(): annotations.pop("uuid") assert res == [expected_first_annotation, expected_second_annotation] - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - assert annotation.name == label.annotations[i].name - def test_video_classification_nesting_bbox(): bbox_annotation = [ @@ -287,14 +800,6 @@ def test_video_classification_nesting_bbox(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - assert annotation.name == label.annotations[i].name - def test_video_classification_point(): bbox_annotation = [ @@ -445,13 +950,6 @@ def test_video_classification_point(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value - def test_video_classification_frameline(): bbox_annotation = [ @@ -619,9 +1117,289 @@ def test_video_classification_frameline(): res = [x for x in serialized] assert res == expected - deserialized = NDJsonConverter.deserialize(res) - res = next(deserialized) - annotations = res.annotations - for i, annotation in enumerate(annotations): - annotation.extra.pop("uuid") - assert annotation.value == label.annotations[i].value + +[ + { + "answer": "a value", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 35, "start": 30}, {"end": 51, "start": 50}], + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "frames": [{"end": 5, "start": 0}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5islwg200gfci6g0oitaypu", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + { + "classifications": [], + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + }, + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + } + ] + }, + ], + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "segments": [ + { + "keyframes": [ + { + "classifications": [], + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + } + ] + }, + { + "keyframes": [ + { + "classifications": [], + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + }, + { + "classifications": [], + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + }, + ] + }, + ], + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + }, + { + "classifications": [], + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "segments": [ + { + "keyframes": [ + { + "bbox": { + "height": 100.0, + "left": 5.0, + "top": 10.0, + "width": 150.0, + }, + "classifications": [], + "frame": 1, + }, + { + "bbox": { + "height": 50.0, + "left": 5.0, + "top": 30.0, + "width": 150.0, + }, + "classifications": [], + "frame": 5, + }, + ] + }, + { + "keyframes": [ + { + "bbox": { + "height": 400.0, + "left": 200.0, + "top": 300.0, + "width": 150.0, + }, + "classifications": [], + "frame": 10, + } + ] + }, + ], + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + }, +] + +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}], + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}], + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "90e2ecf7-c19c-47e6-8cdb-8867e1b9d88c", + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [ + {"x": 10.0, "y": 10.0}, + {"x": 100.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + { + "frame": 5, + "line": [ + {"x": 15.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [ + {"x": 100.0, "y": 10.0}, + {"x": 50.0, "y": 100.0}, + {"x": 50.0, "y": 30.0}, + ], + "classifications": [], + } + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0}, + "classifications": [], + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0}, + "classifications": [], + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0}, + "classifications": [], + }, + ] + }, + ], + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"id": "ckrb1sf1i1g7i0ybcdc6oc8ct"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": { + "top": 10.0, + "left": 5.0, + "height": 100.0, + "width": 150.0, + }, + "classifications": [], + }, + { + "frame": 5, + "bbox": { + "top": 30.0, + "left": 5.0, + "height": 50.0, + "width": 150.0, + }, + "classifications": [], + }, + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": { + "top": 300.0, + "left": 200.0, + "height": 400.0, + "width": 150.0, + }, + "classifications": [], + } + ] + }, + ], + }, +] From 8dc0c5cc0ede03fd2221b3f0c3038c25e83bb163 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 12:10:48 -0700 Subject: [PATCH 3/4] [PLT-1274] Vb/deprecate bulkimportrequest plt 1274 (#1821) --- libs/labelbox/src/labelbox/__init__.py | 1 - libs/labelbox/src/labelbox/orm/model.py | 1 - .../labelbox/schema/bulk_import_request.py | 1004 ----------------- libs/labelbox/src/labelbox/schema/enums.py | 25 - libs/labelbox/src/labelbox/schema/project.py | 119 +- .../test_bulk_import_request.py | 258 ----- .../test_ndjson_validation.py | 36 - 7 files changed, 6 insertions(+), 1438 deletions(-) delete mode 100644 libs/labelbox/src/labelbox/schema/bulk_import_request.py delete mode 100644 libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 5b5ac1f67..f9b82b422 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -6,7 +6,6 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.model_config import ModelConfig -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.schema.annotation_import import ( MALPredictionImport, MEAPredictionImport, diff --git a/libs/labelbox/src/labelbox/orm/model.py b/libs/labelbox/src/labelbox/orm/model.py index 84dcac774..1f3ee1d86 100644 --- a/libs/labelbox/src/labelbox/orm/model.py +++ b/libs/labelbox/src/labelbox/orm/model.py @@ -386,7 +386,6 @@ class Entity(metaclass=EntityMeta): Review: Type[labelbox.Review] User: Type[labelbox.User] LabelingFrontend: Type[labelbox.LabelingFrontend] - BulkImportRequest: Type[labelbox.BulkImportRequest] Benchmark: Type[labelbox.Benchmark] IAMIntegration: Type[labelbox.IAMIntegration] LabelingFrontendOptions: Type[labelbox.LabelingFrontendOptions] diff --git a/libs/labelbox/src/labelbox/schema/bulk_import_request.py b/libs/labelbox/src/labelbox/schema/bulk_import_request.py deleted file mode 100644 index 8e11f3261..000000000 --- a/libs/labelbox/src/labelbox/schema/bulk_import_request.py +++ /dev/null @@ -1,1004 +0,0 @@ -import json -import time -from uuid import UUID, uuid4 -import functools - -import logging -from pathlib import Path -from google.api_core import retry -from labelbox import parser -import requests -from pydantic import ( - ValidationError, - BaseModel, - Field, - field_validator, - model_validator, - ConfigDict, - StringConstraints, -) -from typing_extensions import Literal, Annotated -from typing import ( - Any, - List, - Optional, - BinaryIO, - Dict, - Iterable, - Tuple, - Union, - Type, - Set, - TYPE_CHECKING, -) - -from labelbox import exceptions as lb_exceptions -from labelbox import utils -from labelbox.orm import query -from labelbox.orm.db_object import DbObject -from labelbox.orm.model import Relationship -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.serialization import serialize_labels -from labelbox.orm.model import Field as lb_Field - -if TYPE_CHECKING: - from labelbox import Project - from labelbox.types import Label - -NDJSON_MIME_TYPE = "application/x-ndjson" -logger = logging.getLogger(__name__) - -# TODO: Deprecate this library in place of labelimport and malprediction import library. - - -def _determinants(parent_cls: Any) -> List[str]: - return [ - k - for k, v in parent_cls.model_fields.items() - if v.json_schema_extra and "determinant" in v.json_schema_extra - ] - - -def _make_file_name(project_id: str, name: str) -> str: - return f"{project_id}__{name}.ndjson" - - -# TODO(gszpak): move it to client.py -def _make_request_data( - project_id: str, name: str, content_length: int, file_name: str -) -> dict: - query_str = """mutation createBulkImportRequestFromFilePyApi( - $projectId: ID!, $name: String!, $file: Upload!, $contentLength: Int!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - filePayload: { - file: $file, - contentLength: $contentLength - } - }) { - %s - } - } - """ % query.results_query_part(BulkImportRequest) - variables = { - "projectId": project_id, - "name": name, - "file": None, - "contentLength": content_length, - } - operations = json.dumps({"variables": variables, "query": query_str}) - - return { - "operations": operations, - "map": (None, json.dumps({file_name: ["variables.file"]})), - } - - -def _send_create_file_command( - client, - request_data: dict, - file_name: str, - file_data: Tuple[str, Union[bytes, BinaryIO], str], -) -> dict: - response = client.execute(data=request_data, files={file_name: file_data}) - - if not response.get("createBulkImportRequest", None): - raise lb_exceptions.LabelboxError( - "Failed to create BulkImportRequest, message: %s" - % response.get("errors", None) - or response.get("error", None) - ) - - return response - - -class BulkImportRequest(DbObject): - """Represents the import job when importing annotations. - - Attributes: - name (str) - state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) - input_file_url (str): URL to your web-hosted NDJSON file - error_file_url (str): NDJSON that contains error messages for failed annotations - status_file_url (str): NDJSON that contains status for each annotation - created_at (datetime): UTC timestamp for date BulkImportRequest was created - - project (Relationship): `ToOne` relationship to Project - created_by (Relationship): `ToOne` relationship to User - """ - - name = lb_Field.String("name") - state = lb_Field.Enum(BulkImportRequestState, "state") - input_file_url = lb_Field.String("input_file_url") - error_file_url = lb_Field.String("error_file_url") - status_file_url = lb_Field.String("status_file_url") - created_at = lb_Field.DateTime("created_at") - - project = Relationship.ToOne("Project") - created_by = Relationship.ToOne("User", False, "created_by") - - @property - def inputs(self) -> List[Dict[str, Any]]: - """ - Inputs for each individual annotation uploaded. - This should match the ndjson annotations that you have uploaded. - - Returns: - Uploaded ndjson. - - * This information will expire after 24 hours. - """ - return self._fetch_remote_ndjson(self.input_file_url) - - @property - def errors(self) -> List[Dict[str, Any]]: - """ - Errors for each individual annotation uploaded. This is a subset of statuses - - Returns: - List of dicts containing error messages. Empty list means there were no errors - See `BulkImportRequest.statuses` for more details. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.error_file_url) - - @property - def statuses(self) -> List[Dict[str, Any]]: - """ - Status for each individual annotation uploaded. - - Returns: - A status for each annotation if the upload is done running. - See below table for more details - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - Field - - Description - * - uuid - - Specifies the annotation for the status row. - * - dataRow - - JSON object containing the Labelbox data row ID for the annotation. - * - status - - Indicates SUCCESS or FAILURE. - * - errors - - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. - - * This information will expire after 24 hours. - """ - self.wait_until_done() - return self._fetch_remote_ndjson(self.status_file_url) - - @functools.lru_cache() - def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: - """ - Fetches the remote ndjson file and caches the results. - - Args: - url (str): Can be any url pointing to an ndjson file. - Returns: - ndjson as a list of dicts. - """ - response = requests.get(url) - response.raise_for_status() - return parser.loads(response.text) - - def refresh(self) -> None: - """Synchronizes values of all fields with the database.""" - query_str, params = query.get_single(BulkImportRequest, self.uid) - res = self.client.execute(query_str, params) - res = res[utils.camel_case(BulkImportRequest.type_name())] - self._set_field_values(res) - - def wait_till_done(self, sleep_time_seconds: int = 5) -> None: - self.wait_until_done(sleep_time_seconds) - - def wait_until_done(self, sleep_time_seconds: int = 5) -> None: - """Blocks import job until certain conditions are met. - - Blocks until the BulkImportRequest.state changes either to - `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, - periodically refreshing object's state. - - Args: - sleep_time_seconds (str): a time to block between subsequent API calls - """ - while self.state == BulkImportRequestState.RUNNING: - logger.info(f"Sleeping for {sleep_time_seconds} seconds...") - time.sleep(sleep_time_seconds) - self.__exponential_backoff_refresh() - - @retry.Retry( - predicate=retry.if_exception_type( - lb_exceptions.ApiLimitError, - lb_exceptions.TimeoutError, - lb_exceptions.NetworkError, - ) - ) - def __exponential_backoff_refresh(self) -> None: - self.refresh() - - @classmethod - def from_name( - cls, client, project_id: str, name: str - ) -> "BulkImportRequest": - """Fetches existing BulkImportRequest. - - Args: - client (Client): a Labelbox client - project_id (str): BulkImportRequest's project id - name (str): name of BulkImportRequest - Returns: - BulkImportRequest object - - """ - query_str = """query getBulkImportRequestPyApi( - $projectId: ID!, $name: String!) { - bulkImportRequest(where: { - projectId: $projectId, - name: $name - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name} - response = client.execute(query_str, params=params) - return cls(client, response["bulkImportRequest"]) - - @classmethod - def create_from_url( - cls, client, project_id: str, name: str, url: str, validate=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a publicly accessible URL - to an ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - url (str): publicly accessible URL pointing to ndjson file containing predictions - validate (bool): a flag indicating if there should be a validation - if `url` is valid ndjson - Returns: - BulkImportRequest object - """ - if validate: - logger.warn( - "Validation is turned on. The file will be downloaded locally and processed before uploading." - ) - res = requests.get(url) - data = parser.loads(res.text) - _validate_ndjson(data, client.get_project(project_id)) - - query_str = """mutation createBulkImportRequestPyApi( - $projectId: ID!, $name: String!, $fileUrl: String!) { - createBulkImportRequest(data: { - projectId: $projectId, - name: $name, - fileUrl: $fileUrl - }) { - %s - } - } - """ % query.results_query_part(cls) - params = {"projectId": project_id, "name": name, "fileUrl": url} - bulk_import_request_response = client.execute(query_str, params=params) - return cls( - client, bulk_import_request_response["createBulkImportRequest"] - ) - - @classmethod - def create_from_objects( - cls, - client, - project_id: str, - name: str, - predictions: Union[Iterable[Dict], Iterable["Label"]], - validate=True, - ) -> "BulkImportRequest": - """ - Creates a `BulkImportRequest` from an iterable of dictionaries. - - Conforms to JSON predictions format, e.g.: - ``{ - "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", - "schemaId": "ckappz7d700gn0zbocmqkwd9i", - "dataRow": { - "id": "ck1s02fqxm8fi0757f0e6qtdc" - }, - "bbox": { - "top": 48, - "left": 58, - "height": 865, - "width": 1512 - } - }`` - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - predictions (Iterable[dict]): iterable of dictionaries representing predictions - validate (bool): a flag indicating if there should be a validation - if `predictions` is valid ndjson - Returns: - BulkImportRequest object - """ - if not isinstance(predictions, list): - raise TypeError( - f"annotations must be in a form of Iterable. Found {type(predictions)}" - ) - ndjson_predictions = serialize_labels(predictions) - - if validate: - _validate_ndjson(ndjson_predictions, client.get_project(project_id)) - - data_str = parser.dumps(ndjson_predictions) - if not data_str: - raise ValueError("annotations cannot be empty") - - data = data_str.encode("utf-8") - file_name = _make_file_name(project_id, name) - request_data = _make_request_data( - project_id, name, len(data_str), file_name - ) - file_data = (file_name, data, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, - request_data=request_data, - file_name=file_name, - file_data=file_data, - ) - - return cls(client, response_data["createBulkImportRequest"]) - - @classmethod - def create_from_local_file( - cls, client, project_id: str, name: str, file: Path, validate_file=True - ) -> "BulkImportRequest": - """ - Creates a BulkImportRequest from a local ndjson file with predictions. - - Args: - client (Client): a Labelbox client - project_id (str): id of project for which predictions will be imported - name (str): name of BulkImportRequest - file (Path): local ndjson file with predictions - validate_file (bool): a flag indicating if there should be a validation - if `file` is a valid ndjson file - Returns: - BulkImportRequest object - - """ - file_name = _make_file_name(project_id, name) - content_length = file.stat().st_size - request_data = _make_request_data( - project_id, name, content_length, file_name - ) - - with file.open("rb") as f: - if validate_file: - reader = parser.reader(f) - # ensure that the underlying json load call is valid - # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 - # by iterating through the file so we only store - # each line in memory rather than the entire file - try: - _validate_ndjson(reader, client.get_project(project_id)) - except ValueError: - raise ValueError(f"{file} is not a valid ndjson file") - else: - f.seek(0) - file_data = (file.name, f, NDJSON_MIME_TYPE) - response_data = _send_create_file_command( - client, request_data, file_name, file_data - ) - return cls(client, response_data["createBulkImportRequest"]) - - def delete(self) -> None: - """Deletes the import job and also any annotations created by this import. - - Returns: - None - """ - id_param = "bulk_request_id" - query_str = """mutation deleteBulkImportRequestPyApi($%s: ID!) { - deleteBulkImportRequest(where: {id: $%s}) { - id - name - } - }""" % (id_param, id_param) - self.client.execute(query_str, {id_param: self.uid}) - - -def _validate_ndjson( - lines: Iterable[Dict[str, Any]], project: "Project" -) -> None: - """ - Client side validation of an ndjson object. - - Does not guarentee that an upload will succeed for the following reasons: - * We are not checking the data row types which will cause the following errors to slip through - * Missing frame indices will not causes an error for videos - * Uploaded annotations for the wrong data type will pass (Eg. entity on images) - * We are not checking bounds of an asset (Eg. frame index, image height, text location) - - Args: - lines (Iterable[Dict[str,Any]]): An iterable of ndjson lines - project (Project): id of project for which predictions will be imported - - Raises: - MALValidationError: Raise for invalid NDJson - UuidError: Duplicate UUID in upload - """ - feature_schemas_by_id, feature_schemas_by_name = get_mal_schemas( - project.ontology() - ) - uids: Set[str] = set() - for idx, line in enumerate(lines): - try: - annotation = NDAnnotation(**line) - annotation.validate_instance( - feature_schemas_by_id, feature_schemas_by_name - ) - uuid = str(annotation.uuid) - if uuid in uids: - raise lb_exceptions.UuidError( - f"{uuid} already used in this import job, " - "must be unique for the project." - ) - uids.add(uuid) - except (ValidationError, ValueError, TypeError, KeyError) as e: - raise lb_exceptions.MALValidationError( - f"Invalid NDJson on line {idx}" - ) from e - - -# The rest of this file contains objects for MAL validation -def parse_classification(tool): - """ - Parses a classification from an ontology. Only radio, checklist, and text are supported for mal - - Args: - tool (dict) - - Returns: - dict - """ - if tool["type"] in ["radio", "checklist"]: - option_schema_ids = [r["featureSchemaId"] for r in tool["options"]] - option_names = [r["value"] for r in tool["options"]] - return { - "tool": tool["type"], - "featureSchemaId": tool["featureSchemaId"], - "name": tool["name"], - "options": [*option_schema_ids, *option_names], - } - elif tool["type"] == "text": - return { - "tool": tool["type"], - "name": tool["name"], - "featureSchemaId": tool["featureSchemaId"], - } - - -def get_mal_schemas(ontology): - """ - Converts a project ontology to a dict for easier lookup during ndjson validation - - Args: - ontology (Ontology) - Returns: - Dict, Dict : Useful for looking up a tool from a given feature schema id or name - """ - - valid_feature_schemas_by_schema_id = {} - valid_feature_schemas_by_name = {} - for tool in ontology.normalized["tools"]: - classifications = [ - parse_classification(classification_tool) - for classification_tool in tool["classifications"] - ] - classifications_by_schema_id = { - v["featureSchemaId"]: v for v in classifications - } - classifications_by_name = {v["name"]: v for v in classifications} - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - valid_feature_schemas_by_name[tool["name"]] = { - "tool": tool["tool"], - "classificationsBySchemaId": classifications_by_schema_id, - "classificationsByName": classifications_by_name, - "name": tool["name"], - } - for tool in ontology.normalized["classifications"]: - valid_feature_schemas_by_schema_id[tool["featureSchemaId"]] = ( - parse_classification(tool) - ) - valid_feature_schemas_by_name[tool["name"]] = parse_classification(tool) - return valid_feature_schemas_by_schema_id, valid_feature_schemas_by_name - - -class Bbox(BaseModel): - top: float - left: float - height: float - width: float - - -class Point(BaseModel): - x: float - y: float - - -class FrameLocation(BaseModel): - end: int - start: int - - -class VideoSupported(BaseModel): - # Note that frames are only allowed as top level inferences for video - frames: Optional[List[FrameLocation]] = None - - -# Base class for a special kind of union. -class SpecialUnion: - def __new__(cls, **kwargs): - return cls.build(kwargs) - - @classmethod - def __get_validators__(cls): - yield cls.build - - @classmethod - def get_union_types(cls): - if not issubclass(cls, SpecialUnion): - raise TypeError("{} must be a subclass of SpecialUnion") - - union_types = [x for x in cls.__orig_bases__ if hasattr(x, "__args__")] - if len(union_types) < 1: - raise TypeError( - "Class {cls} should inherit from a union of objects to build" - ) - if len(union_types) > 1: - raise TypeError( - f"Class {cls} should inherit from exactly one union of objects to build. Found {union_types}" - ) - return union_types[0].__args__[0].__args__ - - @classmethod - def build(cls: Any, data: Union[dict, BaseModel]) -> "NDBase": - """ - Checks through all objects in the union to see which matches the input data. - Args: - data (Union[dict, BaseModel]) : The data for constructing one of the objects in the union - raises: - KeyError: data does not contain the determinant fields for any of the types supported by this SpecialUnion - ValidationError: Error while trying to construct a specific object in the union - - """ - if isinstance(data, BaseModel): - data = data.model_dump() - - top_level_fields = [] - max_match = 0 - matched = None - - for type_ in cls.get_union_types(): - determinate_fields = _determinants(type_) - top_level_fields.append(determinate_fields) - matches = sum([val in determinate_fields for val in data]) - if matches == len(determinate_fields) and matches > max_match: - max_match = matches - matched = type_ - - if matched is not None: - # These two have the exact same top level keys - if matched in [NDRadio, NDText]: - if isinstance(data["answer"], dict): - matched = NDRadio - elif isinstance(data["answer"], str): - matched = NDText - else: - raise TypeError( - f"Unexpected type for answer field. Found {data['answer']}. Expected a string or a dict" - ) - return matched(**data) - else: - raise KeyError( - f"Invalid annotation. Must have one of the following keys : {top_level_fields}. Found {data}." - ) - - @classmethod - def schema(cls): - results = {"definitions": {}} - for cl in cls.get_union_types(): - schema = cl.schema() - results["definitions"].update(schema.pop("definitions")) - results[cl.__name__] = schema - return results - - -class DataRow(BaseModel): - id: str - - -class NDFeatureSchema(BaseModel): - schemaId: Optional[str] = None - name: Optional[str] = None - - @model_validator(mode="after") - def most_set_one(self): - if self.schemaId is None and self.name is None: - raise ValueError( - "Must set either schemaId or name for all feature schemas" - ) - return self - - -class NDBase(NDFeatureSchema): - ontology_type: str - uuid: UUID - dataRow: DataRow - model_config = ConfigDict(extra="forbid") - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - if self.name: - if self.name not in valid_feature_schemas_by_name: - raise ValueError( - f"Name {self.name} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_name[self.name]["tool"] - ): - raise ValueError( - f"Name {self.name} does not map to the assigned tool {valid_feature_schemas_by_name[self.name]['tool']}" - ) - - if self.schemaId: - if self.schemaId not in valid_feature_schemas_by_id: - raise ValueError( - f"Schema id {self.schemaId} is not valid for the provided project's ontology." - ) - - if ( - self.ontology_type - != valid_feature_schemas_by_id[self.schemaId]["tool"] - ): - raise ValueError( - f"Schema id {self.schemaId} does not map to the assigned tool {valid_feature_schemas_by_id[self.schemaId]['tool']}" - ) - - def validate_instance( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - self.validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - - -###### Classifications ###### - - -class NDText(NDBase): - ontology_type: Literal["text"] = "text" - answer: str = Field(json_schema_extra={"determinant": True}) - # No feature schema to check - - -class NDChecklist(VideoSupported, NDBase): - ontology_type: Literal["checklist"] = "checklist" - answers: List[NDFeatureSchema] = Field( - json_schema_extra={"determinant": True} - ) - - @field_validator("answers", mode="before") - def validate_answers(cls, value, field): - # constr not working with mypy. - if not len(value): - raise ValueError("Checklist answers should not be empty") - return value - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - # Test top level feature schema for this tool - super(NDChecklist, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - # Test the feature schemas provided to the answer field - if len( - set([answer.name or answer.schemaId for answer in self.answers]) - ) != len(self.answers): - raise ValueError( - f"Duplicated featureSchema found for checklist {self.uuid}" - ) - for answer in self.answers: - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if answer.name not in options and answer.schemaId not in options: - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {answer}" - ) - - -class NDRadio(VideoSupported, NDBase): - ontology_type: Literal["radio"] = "radio" - answer: NDFeatureSchema = Field(json_schema_extra={"determinant": True}) - - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDRadio, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - options = ( - valid_feature_schemas_by_name[self.name]["options"] - if self.name - else valid_feature_schemas_by_id[self.schemaId]["options"] - ) - if ( - self.answer.name not in options - and self.answer.schemaId not in options - ): - raise ValueError( - f"Feature schema provided to {self.ontology_type} invalid. Expected on of {options}. Found {self.answer.name or self.answer.schemaId}" - ) - - -# A union with custom construction logic to improve error messages -class NDClassification( - SpecialUnion, - Type[Union[NDText, NDRadio, NDChecklist]], # type: ignore -): ... - - -###### Tools ###### - - -class NDBaseTool(NDBase): - classifications: List[NDClassification] = [] - - # This is indepdent of our problem - def validate_feature_schemas( - self, valid_feature_schemas_by_id, valid_feature_schemas_by_name - ): - super(NDBaseTool, self).validate_feature_schemas( - valid_feature_schemas_by_id, valid_feature_schemas_by_name - ) - for classification in self.classifications: - classification.validate_feature_schemas( - valid_feature_schemas_by_name[self.name][ - "classificationsBySchemaId" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsBySchemaId" - ], - valid_feature_schemas_by_name[self.name][ - "classificationsByName" - ] - if self.name - else valid_feature_schemas_by_id[self.schemaId][ - "classificationsByName" - ], - ) - - @field_validator("classifications", mode="before") - def validate_subclasses(cls, value, field): - # Create uuid and datarow id so we don't have to define classification objects twice - # This is caused by the fact that we require these ids for top level classifications but not for subclasses - results = [] - dummy_id = "child".center(25, "_") - for row in value: - results.append( - {**row, "dataRow": {"id": dummy_id}, "uuid": str(uuid4())} - ) - return results - - -class NDPolygon(NDBaseTool): - ontology_type: Literal["polygon"] = "polygon" - polygon: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("polygon") - def is_geom_valid(cls, v): - if len(v) < 3: - raise ValueError( - f"A polygon must have at least 3 points to be valid. Found {v}" - ) - return v - - -class NDPolyline(NDBaseTool): - ontology_type: Literal["line"] = "line" - line: List[Point] = Field(json_schema_extra={"determinant": True}) - - @field_validator("line") - def is_geom_valid(cls, v): - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - return v - - -class NDRectangle(NDBaseTool): - ontology_type: Literal["rectangle"] = "rectangle" - bbox: Bbox = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class NDPoint(NDBaseTool): - ontology_type: Literal["point"] = "point" - point: Point = Field(json_schema_extra={"determinant": True}) - # Could check if points are positive - - -class EntityLocation(BaseModel): - start: int - end: int - - -class NDTextEntity(NDBaseTool): - ontology_type: Literal["named-entity"] = "named-entity" - location: EntityLocation = Field(json_schema_extra={"determinant": True}) - - @field_validator("location") - def is_valid_location(cls, v): - if isinstance(v, BaseModel): - v = v.model_dump() - - if len(v) < 2: - raise ValueError( - f"A line must have at least 2 points to be valid. Found {v}" - ) - if v["start"] < 0: - raise ValueError(f"Text location must be positive. Found {v}") - if v["start"] > v["end"]: - raise ValueError( - f"Text start location must be less or equal than end. Found {v}" - ) - return v - - -class RLEMaskFeatures(BaseModel): - counts: List[int] - size: List[int] - - @field_validator("counts") - def validate_counts(cls, counts): - if not all([count >= 0 for count in counts]): - raise ValueError( - "Found negative value for counts. They should all be zero or positive" - ) - return counts - - @field_validator("size") - def validate_size(cls, size): - if len(size) != 2: - raise ValueError( - f"Mask `size` should have two ints representing height and with. Found : {size}" - ) - if not all([count > 0 for count in size]): - raise ValueError( - f"Mask `size` should be a postitive int. Found : {size}" - ) - return size - - -class PNGMaskFeatures(BaseModel): - # base64 encoded png bytes - png: str - - -class URIMaskFeatures(BaseModel): - instanceURI: str - colorRGB: Union[List[int], Tuple[int, int, int]] - - @field_validator("colorRGB") - def validate_color(cls, colorRGB): - # Does the dtype matter? Can it be a float? - if not isinstance(colorRGB, (tuple, list)): - raise ValueError( - f"Received color that is not a list or tuple. Found : {colorRGB}" - ) - elif len(colorRGB) != 3: - raise ValueError( - f"Must provide RGB values for segmentation colors. Found : {colorRGB}" - ) - elif not all([0 <= color <= 255 for color in colorRGB]): - raise ValueError( - f"All rgb colors must be between 0 and 255. Found : {colorRGB}" - ) - return colorRGB - - -class NDMask(NDBaseTool): - ontology_type: Literal["superpixel"] = "superpixel" - mask: Union[URIMaskFeatures, PNGMaskFeatures, RLEMaskFeatures] = Field( - json_schema_extra={"determinant": True} - ) - - -# A union with custom construction logic to improve error messages -class NDTool( - SpecialUnion, - Type[ # type: ignore - Union[ - NDMask, - NDTextEntity, - NDPoint, - NDRectangle, - NDPolyline, - NDPolygon, - ] - ], -): ... - - -class NDAnnotation( - SpecialUnion, - Type[Union[NDTool, NDClassification]], # type: ignore -): - @classmethod - def build(cls: Any, data) -> "NDBase": - if not isinstance(data, dict): - raise ValueError("value must be dict") - errors = [] - for cl in cls.get_union_types(): - try: - return cl(**data) - except KeyError as e: - errors.append(f"{cl.__name__}: {e}") - - raise ValueError( - "Unable to construct any annotation.\n{}".format("\n".join(errors)) - ) - - @classmethod - def schema(cls): - data = {"definitions": {}} - for type_ in cls.get_union_types(): - schema_ = type_.schema() - data["definitions"].update(schema_.pop("definitions")) - data[type_.__name__] = schema_ - return data diff --git a/libs/labelbox/src/labelbox/schema/enums.py b/libs/labelbox/src/labelbox/schema/enums.py index 6f8aebc58..dfc87c8a4 100644 --- a/libs/labelbox/src/labelbox/schema/enums.py +++ b/libs/labelbox/src/labelbox/schema/enums.py @@ -1,31 +1,6 @@ from enum import Enum -class BulkImportRequestState(Enum): - """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). - - If you are not usinig MEA continue using BulkImportRequest. - AnnotationImports are in beta and will change soon. - - .. list-table:: - :widths: 15 150 - :header-rows: 1 - - * - State - - Description - * - RUNNING - - Indicates that the import job is not done yet. - * - FAILED - - Indicates the import job failed. Check `BulkImportRequest.errors` for more information - * - FINISHED - - Indicates the import job is no longer running. Check `BulkImportRequest.statuses` for more information - """ - - RUNNING = "RUNNING" - FAILED = "FAILED" - FINISHED = "FINISHED" - - class AnnotationImportState(Enum): """State of the import job when importing annotations (RUNNING, FAILED, or FINISHED). diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index f8876f7c4..88153e48f 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -5,36 +5,29 @@ import warnings from collections import namedtuple from datetime import datetime, timezone -from pathlib import Path from typing import ( TYPE_CHECKING, Any, Dict, - Iterable, List, Optional, Tuple, - TypeVar, Union, overload, ) -from urllib.parse import urlparse from labelbox.schema.labeling_service import ( LabelingService, LabelingServiceStatus, ) from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -import requests -from labelbox import parser from labelbox import utils from labelbox.exceptions import error_message_for_unparsed_graphql_error from labelbox.exceptions import ( InvalidQueryError, LabelboxError, ProcessingWaitTimeout, - ResourceConflict, ResourceNotFoundError, ) from labelbox.orm import query @@ -46,7 +39,6 @@ from labelbox.schema.data_row import DataRow from labelbox.schema.export_filters import ( ProjectExportFilters, - validate_datetime, build_filters, ) from labelbox.schema.export_params import ProjectExportParams @@ -63,7 +55,6 @@ from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import ( EditorTaskType, - OntologyKind, UploadType, ) from labelbox.schema.project_overview import ( @@ -72,7 +63,7 @@ ) if TYPE_CHECKING: - from labelbox import BulkImportRequest + pass DataRowPriority = int @@ -579,7 +570,7 @@ def upsert_instructions(self, instructions_file: str) -> None: if frontend.name != "Editor": logger.warning( - f"This function has only been tested to work with the Editor front end. Found %s", + "This function has only been tested to work with the Editor front end. Found %s", frontend.name, ) @@ -814,7 +805,7 @@ def create_batch( if row_count > 100_000: raise ValueError( - f"Batch exceeds max size, break into smaller batches" + "Batch exceeds max size, break into smaller batches" ) if not row_count: raise ValueError("You need at least one data row in a batch") @@ -1088,7 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Batch was not created successfully: " + "Batch was not created successfully: " + json.dumps(task.errors) ) @@ -1436,7 +1427,7 @@ def update_data_row_labeling_priority( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Priority was not updated successfully: " + "Priority was not updated successfully: " + json.dumps(task.errors) ) return True @@ -1488,33 +1479,6 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: "showingPredictionsToLabelers" ] - def bulk_import_requests(self) -> PaginatedCollection: - """Returns bulk import request objects which are used in model-assisted labeling. - These are returned with the oldest first, and most recent last. - """ - - id_param = "project_id" - query_str = """query ListAllImportRequestsPyApi($%s: ID!) { - bulkImportRequests ( - where: { projectId: $%s } - skip: %%d - first: %%d - ) { - %s - } - }""" % ( - id_param, - id_param, - query.results_query_part(Entity.BulkImportRequest), - ) - return PaginatedCollection( - self.client, - query_str, - {id_param: str(self.uid)}, - ["bulkImportRequests"], - Entity.BulkImportRequest, - ) - def batches(self) -> PaginatedCollection: """Fetch all batches that belong to this project @@ -1629,7 +1593,7 @@ def move_data_rows_to_task_queue(self, data_row_ids, task_queue_id: str): task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - f"Data rows were not moved successfully: " + "Data rows were not moved successfully: " + json.dumps(task.errors) ) @@ -1639,77 +1603,6 @@ def _wait_for_task(self, task_id: str) -> Task: return task - def upload_annotations( - self, - name: str, - annotations: Union[str, Path, Iterable[Dict]], - validate: bool = False, - ) -> "BulkImportRequest": # type: ignore - """Uploads annotations to a new Editor project. - - Args: - name (str): name of the BulkImportRequest job - annotations (str or Path or Iterable): - url that is publicly accessible by Labelbox containing an - ndjson file - OR local path to an ndjson file - OR iterable of annotation rows - validate (bool): - Whether or not to validate the payload before uploading. - Returns: - BulkImportRequest - """ - - if isinstance(annotations, str) or isinstance(annotations, Path): - - def _is_url_valid(url: Union[str, Path]) -> bool: - """Verifies that the given string is a valid url. - - Args: - url: string to be checked - Returns: - True if the given url is valid otherwise False - - """ - if isinstance(url, Path): - return False - parsed = urlparse(url) - return bool(parsed.scheme) and bool(parsed.netloc) - - if _is_url_valid(annotations): - return Entity.BulkImportRequest.create_from_url( - client=self.client, - project_id=self.uid, - name=name, - url=str(annotations), - validate=validate, - ) - else: - path = Path(annotations) - if not path.exists(): - raise FileNotFoundError( - f"{annotations} is not a valid url nor existing local file" - ) - return Entity.BulkImportRequest.create_from_local_file( - client=self.client, - project_id=self.uid, - name=name, - file=path, - validate_file=validate, - ) - elif isinstance(annotations, Iterable): - return Entity.BulkImportRequest.create_from_objects( - client=self.client, - project_id=self.uid, - name=name, - predictions=annotations, # type: ignore - validate=validate, - ) - else: - raise ValueError( - f"Invalid annotations given of type: {type(annotations)}" - ) - def _wait_until_data_rows_are_processed( self, data_row_ids: Optional[List[str]] = None, diff --git a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py b/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py deleted file mode 100644 index 9abae1422..000000000 --- a/libs/labelbox/tests/data/annotation_import/test_bulk_import_request.py +++ /dev/null @@ -1,258 +0,0 @@ -from unittest.mock import patch -import uuid -from labelbox import parser, Project -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -import pytest -import random -from labelbox.data.annotation_types.annotation import ObjectAnnotation -from labelbox.data.annotation_types.classification.classification import ( - Checklist, - ClassificationAnnotation, - ClassificationAnswer, - Radio, -) -from labelbox.data.annotation_types.data.video import VideoData -from labelbox.data.annotation_types.geometry.point import Point -from labelbox.data.annotation_types.geometry.rectangle import ( - Rectangle, - RectangleUnit, -) -from labelbox.data.annotation_types.label import Label -from labelbox.data.annotation_types.data.text import TextData -from labelbox.data.annotation_types.ner import ( - DocumentEntity, - DocumentTextSelection, -) -from labelbox.data.annotation_types.video import VideoObjectAnnotation - -from labelbox.data.serialization import NDJsonConverter -from labelbox.exceptions import MALValidationError, UuidError -from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.enums import BulkImportRequestState -from labelbox.schema.annotation_import import LabelImport, MALPredictionImport -from labelbox.schema.media_type import MediaType - -""" -- Here we only want to check that the uploads are calling the validation -- Then with unit tests we can check the types of errors raised -""" -# TODO: remove library once bulk import requests are removed - - -@pytest.mark.order(1) -def test_create_from_url(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_file(module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - with pytest.raises(MALValidationError): - module_project.upload_annotations( - name=name, annotations=url, validate=True - ) - # Schema ids shouldn't match - - -def test_create_from_objects( - module_project: Project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_create_from_label_objects( - module_project, predictions, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(predictions)) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=labels - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - normalized_predictions = list(NDJsonConverter.serialize(labels)) - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, normalized_predictions - ) - - -def test_create_from_local_file( - tmp_path, predictions, module_project, annotation_import_test_helpers -): - name = str(uuid.uuid4()) - file_name = f"{name}.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - parser.dump(predictions, f) - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=str(file_path), validate=False - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - annotation_import_test_helpers.assert_file_content( - bulk_import_request.input_file_url, predictions - ) - - -def test_get(client, module_project): - name = str(uuid.uuid4()) - url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - module_project.upload_annotations( - name=name, annotations=url, validate=False - ) - - bulk_import_request = BulkImportRequest.from_name( - client, project_id=module_project.uid, name=name - ) - - assert bulk_import_request.project() == module_project - assert bulk_import_request.name == name - assert bulk_import_request.input_file_url == url - assert bulk_import_request.error_file_url is None - assert bulk_import_request.status_file_url is None - assert bulk_import_request.state == BulkImportRequestState.RUNNING - - -def test_validate_ndjson(tmp_path, module_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - -def test_validate_ndjson_uuid(tmp_path, module_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - uid = str(uuid.uuid4()) - repeat_uuid[0]["uuid"] = uid - repeat_uuid[1]["uuid"] = uid - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(UuidError): - module_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - -@pytest.mark.skip( - "Slow test and uses a deprecated api endpoint for annotation imports" -) -def test_wait_till_done(rectangle_inference, project): - name = str(uuid.uuid4()) - url = project.client.upload_data( - content=parser.dumps(rectangle_inference), sign=True - ) - bulk_import_request = project.upload_annotations( - name=name, annotations=url, validate=False - ) - - assert len(bulk_import_request.inputs) == 1 - bulk_import_request.wait_until_done() - assert bulk_import_request.state == BulkImportRequestState.FINISHED - - # Check that the status files are being returned as expected - assert len(bulk_import_request.errors) == 0 - assert len(bulk_import_request.inputs) == 1 - assert bulk_import_request.inputs[0]["uuid"] == rectangle_inference["uuid"] - assert len(bulk_import_request.statuses) == 1 - assert bulk_import_request.statuses[0]["status"] == "SUCCESS" - assert ( - bulk_import_request.statuses[0]["uuid"] == rectangle_inference["uuid"] - ) - - -def test_project_bulk_import_requests(module_project, predictions): - result = module_project.bulk_import_requests() - assert len(list(result)) == 0 - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - name = str(uuid.uuid4()) - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - - result = module_project.bulk_import_requests() - assert len(list(result)) == 3 - - -def test_delete(module_project, predictions): - name = str(uuid.uuid4()) - - bulk_import_requests = module_project.bulk_import_requests() - [ - bulk_import_request.delete() - for bulk_import_request in bulk_import_requests - ] - - bulk_import_request = module_project.upload_annotations( - name=name, annotations=predictions - ) - bulk_import_request.wait_until_done() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 1 - - bulk_import_request.delete() - all_import_requests = module_project.bulk_import_requests() - assert len(list(all_import_requests)) == 0 diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index a0df559fc..0ec742333 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,8 +1,6 @@ from labelbox.schema.media_type import MediaType -from labelbox.schema.project import Project import pytest -from labelbox import parser from pytest_cases import parametrize, fixture_ref from labelbox.exceptions import MALValidationError @@ -12,7 +10,6 @@ NDMask, NDPolygon, NDPolyline, - NDRadio, NDRectangle, NDText, NDTextEntity, @@ -191,39 +188,6 @@ def test_missing_feature_schema(module_project, rectangle_inference): _validate_ndjson([pred], module_project) -def test_validate_ndjson(tmp_path, configured_project): - file_name = f"broken.ndjson" - file_path = tmp_path / file_name - with file_path.open("w") as f: - f.write("test") - - with pytest.raises(ValueError): - configured_project.upload_annotations( - name="name", annotations=str(file_path), validate=True - ) - - -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): - file_name = f"repeat_uuid.ndjson" - file_path = tmp_path / file_name - repeat_uuid = predictions.copy() - repeat_uuid[0]["uuid"] = "test_uuid" - repeat_uuid[1]["uuid"] = "test_uuid" - - with file_path.open("w") as f: - parser.dump(repeat_uuid, f) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=str(file_path) - ) - - with pytest.raises(MALValidationError): - configured_project.upload_annotations( - name="name", validate=True, annotations=repeat_uuid - ) - - @pytest.mark.parametrize("configured_project", [MediaType.Video], indirect=True) def test_video_upload(video_checklist_inference, configured_project): pred = video_checklist_inference[0].copy() From aa0942e268a1ac7bcbbd7c7eb0ec1fcad9b544b6 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 17 Sep 2024 15:27:39 -0700 Subject: [PATCH 4/4] Fix merge issues --- libs/labelbox/src/labelbox/schema/__init__.py | 21 +++++++++---------- libs/labelbox/src/labelbox/schema/project.py | 3 +-- .../test_ndjson_validation.py | 17 ++------------- 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 03327e0d1..e57c04a29 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -1,29 +1,28 @@ -import labelbox.schema.asset_attachment -import labelbox.schema.bulk_import_request import labelbox.schema.annotation_import +import labelbox.schema.asset_attachment +import labelbox.schema.batch import labelbox.schema.benchmark +import labelbox.schema.catalog import labelbox.schema.data_row +import labelbox.schema.data_row_metadata import labelbox.schema.dataset +import labelbox.schema.iam_integration +import labelbox.schema.identifiable +import labelbox.schema.identifiables import labelbox.schema.invite import labelbox.schema.label import labelbox.schema.labeling_frontend import labelbox.schema.labeling_service +import labelbox.schema.media_type import labelbox.schema.model import labelbox.schema.model_run import labelbox.schema.ontology +import labelbox.schema.ontology_kind import labelbox.schema.organization import labelbox.schema.project +import labelbox.schema.project_overview import labelbox.schema.review import labelbox.schema.role import labelbox.schema.task import labelbox.schema.user import labelbox.schema.webhook -import labelbox.schema.data_row_metadata -import labelbox.schema.batch -import labelbox.schema.iam_integration -import labelbox.schema.media_type -import labelbox.schema.identifiables -import labelbox.schema.identifiable -import labelbox.schema.catalog -import labelbox.schema.ontology_kind -import labelbox.schema.project_overview diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 88153e48f..f2de4db5e 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1079,8 +1079,7 @@ def _create_batch_async( task = self._wait_for_task(task_id) if task.status != "COMPLETE": raise LabelboxError( - "Batch was not created successfully: " - + json.dumps(task.errors) + "Batch was not created successfully: " + json.dumps(task.errors) ) return self.client.get_batch(self.uid, batch_id) diff --git a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py index 0ec742333..9e8963a26 100644 --- a/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py +++ b/libs/labelbox/tests/data/annotation_import/test_ndjson_validation.py @@ -1,21 +1,8 @@ -from labelbox.schema.media_type import MediaType import pytest - -from pytest_cases import parametrize, fixture_ref +from pytest_cases import fixture_ref, parametrize from labelbox.exceptions import MALValidationError -from labelbox.schema.bulk_import_request import ( - NDChecklist, - NDClassification, - NDMask, - NDPolygon, - NDPolyline, - NDRectangle, - NDText, - NDTextEntity, - NDTool, - _validate_ndjson, -) +from labelbox.schema.media_type import MediaType """ - These NDlabels are apart of bulkImportReqeust and should be removed once bulk import request is removed