Skip to content

Commit 07f5a35

Browse files
authored
[DOCS-450] upsert and attachment new methods (#1590)
I removed redundancies I removed methods that are not efficient I added new methods to attachments and create data rows
1 parent d95be4a commit 07f5a35

File tree

1 file changed

+168
-95
lines changed

1 file changed

+168
-95
lines changed

examples/basics/data_rows.ipynb

Lines changed: 168 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -224,52 +224,36 @@
224224
{
225225
"metadata": {},
226226
"source": [
227-
"### Create\n",
228-
"* Create a single data row with and without metadata"
227+
"## Create\n",
228+
"We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, "
229229
],
230230
"cell_type": "markdown"
231231
},
232232
{
233233
"metadata": {},
234234
"source": [
235-
"dataset = client.create_dataset(name=\"data_rows_demo_dataset\")\n",
236-
"\n",
237-
"# It is recommended that you add global keys to your data rows.\n",
238-
"dataset.create_data_row(row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0002.jpeg\",\n",
239-
" global_key=str(uuid.uuid4()))\n",
240-
"\n",
241-
"# You can also upload metadata along with your data row\n",
242-
"mdo = client.get_data_row_metadata_ontology()\n",
243-
"dataset.create_data_row(row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n",
244-
" global_key=str(uuid.uuid4()),\n",
245-
" metadata_fields=[\n",
246-
" lb.DataRowMetadataField(\n",
247-
" schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n",
248-
" value=\"tag_string\", # typed inputs\n",
249-
" ),\n",
250-
" ],\n",
251-
" )"
235+
"### Create data rows via `dataset.upsert_data_rows()`"
252236
],
253-
"cell_type": "code",
254-
"outputs": [],
255-
"execution_count": null
237+
"cell_type": "markdown"
256238
},
257239
{
258240
"metadata": {},
259241
"source": [
260-
"### [Recommended] Bulk create data rows (This is much faster than creating individual data rows)"
242+
"# Create a dataset\n",
243+
"dataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n",
244+
"# You can also upload metadata along with your data row\n",
245+
"mdo = client.get_data_row_metadata_ontology()"
261246
],
262-
"cell_type": "markdown"
247+
"cell_type": "code",
248+
"outputs": [],
249+
"execution_count": null
263250
},
264251
{
265252
"metadata": {},
266253
"source": [
267-
"# Create a dataset\n",
268-
"dataset = client.create_dataset(name=\"data_rows_demo_dataset_2\")\n",
269-
"\n",
270254
"uploads = []\n",
271255
"# Generate data rows\n",
272-
"for i in range(1,9):\n",
256+
"for i in range(1,8):\n",
273257
" uploads.append({\n",
274258
" \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n",
275259
" \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n",
@@ -279,10 +263,40 @@
279263
" schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n",
280264
" value=\"tag_string\", # typed inputs\n",
281265
" ),\n",
266+
" ],\n",
267+
" \"attachments\": [\n",
268+
" {\n",
269+
" \"type\": \"IMAGE_OVERLAY\",\n",
270+
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n",
271+
" },\n",
272+
" {\n",
273+
" \"type\": \"RAW_TEXT\",\n",
274+
" \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\"\n",
275+
" },\n",
276+
" {\n",
277+
" \"type\": \"TEXT_URL\",\n",
278+
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n",
279+
" },\n",
280+
" {\n",
281+
" \"type\": \"IMAGE\",\n",
282+
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n",
283+
" },\n",
284+
" {\n",
285+
" \"type\": \"VIDEO\",\n",
286+
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n",
287+
" },\n",
288+
" {\n",
289+
" \"type\": \"HTML\",\n",
290+
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n",
291+
" },\n",
292+
" {\n",
293+
" \"type\": \"PDF_URL\",\n",
294+
" \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n",
295+
" }\n",
282296
" ]\n",
283297
" })\n",
284298
"\n",
285-
"task1 = dataset.create_data_rows(uploads)\n",
299+
"task1 = dataset.upsert_data_rows(uploads)\n",
286300
"task1.wait_till_done()\n",
287301
"print(\"ERRORS: \" , task1.errors)\n",
288302
"print(\"RESULTS:\" , task1.result)"
@@ -294,49 +308,54 @@
294308
{
295309
"metadata": {},
296310
"source": [
297-
"### Create data rows with attachments"
311+
"Create data rows from data in your local path "
298312
],
299313
"cell_type": "markdown"
300314
},
301315
{
302316
"metadata": {},
303317
"source": [
304-
"task2 = dataset.create_data_rows([{\n",
318+
"from PIL import Image\n",
319+
"\n",
320+
"# Create dummy empty jpeg file\n",
321+
"width = 400\n",
322+
"height = 300\n",
323+
"color = (255, 255, 255) # White color\n",
324+
"image = Image.new(\"RGB\", (width, height), color)\n",
325+
"\n",
326+
"# Save the image as a JPEG file\n",
327+
"image.save(\"dummy.jpg\")\n",
328+
"\n",
329+
"local_data_path = \"dummy.jpg\"\n",
330+
"\n",
331+
"data = {\n",
332+
" \"row_data\" : local_data_path,\n",
333+
" \"global_key\": str(uuid.uuid4())\n",
334+
"}\n",
335+
"\n",
336+
"task3 = dataset.upsert_data_rows([data])\n",
337+
"task3.wait_till_done()\n",
338+
"print(\"ERRORS: \" , task3.errors)\n",
339+
"print(\"RESULTS:\" , task3.result)"
340+
],
341+
"cell_type": "code",
342+
"outputs": [],
343+
"execution_count": null
344+
},
345+
{
346+
"metadata": {},
347+
"source": [
348+
"# You can mix local files with urls when creating data rows\n",
349+
"task4 = dataset.upsert_data_rows([{\n",
305350
" \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n",
306-
" \"global_key\": str(uuid.uuid4()),\n",
307-
" \"attachments\": [\n",
308-
" {\n",
309-
" \"type\": \"IMAGE_OVERLAY\",\n",
310-
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n",
311-
" },\n",
312-
" {\n",
313-
" \"type\": \"RAW_TEXT\",\n",
314-
" \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\"\n",
315-
" },\n",
316-
" {\n",
317-
" \"type\": \"TEXT_URL\",\n",
318-
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n",
319-
" },\n",
320-
" {\n",
321-
" \"type\": \"IMAGE\",\n",
322-
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n",
323-
" },\n",
324-
" {\n",
325-
" \"type\": \"VIDEO\",\n",
326-
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n",
327-
" },\n",
328-
" {\n",
329-
" \"type\": \"HTML\",\n",
330-
" \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n",
331-
" },\n",
332-
" {\n",
333-
" \"type\": \"PDF_URL\",\n",
334-
" \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n",
335-
" }\n",
336-
" ]\n",
351+
" \"global_key\": str(uuid.uuid4())\n",
352+
" }, {\n",
353+
" \"row_data\": local_data_path,\n",
354+
" \"global_key\": str(uuid.uuid4())\n",
337355
" }])\n",
338-
"print(\"ERRORS: \" , task2.errors)\n",
339-
"print(\"RESULTS:\" , task2.result)"
356+
"task4.wait_till_done()\n",
357+
"print(\"ERRORS: \" , task4.errors)\n",
358+
"print(\"RESULTS:\" , task4.result)"
340359
],
341360
"cell_type": "code",
342361
"outputs": [],
@@ -345,21 +364,14 @@
345364
{
346365
"metadata": {},
347366
"source": [
348-
"### Create data rows using data in your local path"
367+
"### Create data rows via `dataset.create_data_rows()`\n"
349368
],
350369
"cell_type": "markdown"
351370
},
352371
{
353372
"metadata": {},
354373
"source": [
355-
"# Local paths\n",
356-
"local_data_path = \"/tmp/test_data_row.txt\"\n",
357-
"with open(local_data_path, 'w') as file:\n",
358-
" file.write(\"sample data\")\n",
359-
"\n",
360-
"task3 = dataset.create_data_rows([local_data_path])\n",
361-
"print(\"ERRORS: \" , task3.errors)\n",
362-
"print(\"RESULTS:\" , task3.result)"
374+
"dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")"
363375
],
364376
"cell_type": "code",
365377
"outputs": [],
@@ -368,16 +380,25 @@
368380
{
369381
"metadata": {},
370382
"source": [
371-
"# You can mix local files with urls when creating data rows\n",
372-
"task4 = dataset.create_data_rows([{\n",
373-
" \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\",\n",
374-
" \"global_key\": str(uuid.uuid4())\n",
375-
" }, {\n",
376-
" \"row_data\": local_data_path,\n",
377-
" \"global_key\": str(uuid.uuid4())\n",
378-
" }])\n",
379-
"print(\"ERRORS: \" , task4.errors)\n",
380-
"print(\"RESULTS:\" , task4.result)"
383+
"uploads = []\n",
384+
"# Generate data rows\n",
385+
"for i in range(1,9):\n",
386+
" uploads.append({\n",
387+
" \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n",
388+
" \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n",
389+
" ## add metadata (optional)\n",
390+
" \"metadata_fields\": [\n",
391+
" lb.DataRowMetadataField(\n",
392+
" schema_id=mdo.reserved_by_name[\"tag\"].uid, # specify the schema id\n",
393+
" value=\"tag_string\", # typed inputs\n",
394+
" ),\n",
395+
" ]\n",
396+
" })\n",
397+
"\n",
398+
"task1_2 = dataset_2.create_data_rows(uploads)\n",
399+
"task1_2.wait_till_done()\n",
400+
"print(\"ERRORS: \" , task1_2.errors)\n",
401+
"print(\"RESULTS:\" , task1_2.result)"
381402
],
382403
"cell_type": "code",
383404
"outputs": [],
@@ -387,19 +408,52 @@
387408
"metadata": {},
388409
"source": [
389410
"### Update\n",
390-
"Only two fields can be updated after a data row is created\n",
391-
"1. Global keys \n",
392-
"2. Row data\n"
411+
"`dataset.upsert_data_rows()` can also be use to update data rows\n",
412+
"\n",
413+
"To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n"
393414
],
394415
"cell_type": "markdown"
395416
},
396417
{
397418
"metadata": {},
398419
"source": [
399-
"data_row = client.get_data_row(\"<data_row_id_to_update>\")\n",
400-
"new_id = str(uuid.uuid4())\n",
401-
"data_row.update(global_key=new_id, row_data=\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0005.jpeg\")\n",
402-
"print(data_row)"
420+
"# Fetch a data row from the first dataset example\n",
421+
"ts = dataset.export()\n",
422+
"ts.wait_till_done()\n",
423+
"DATA_ROW_ID = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['id']\n",
424+
"GLOBAL_KEY = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['global_key']\n",
425+
"\n",
426+
"print(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")\n"
427+
],
428+
"cell_type": "code",
429+
"outputs": [],
430+
"execution_count": null
431+
},
432+
{
433+
"metadata": {},
434+
"source": [
435+
"# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n",
436+
"data = {\n",
437+
" \"key\": lb.UniqueId(DATA_ROW_ID),\n",
438+
" \"global_key\": \"NEW-ID-%id\" % uuid.uuid1(),\n",
439+
" \"metadata_fields\": [\n",
440+
" # New metadata\n",
441+
" lb.DataRowMetadataField(\n",
442+
" schema_id=mdo.reserved_by_name['captureDateTime'].uid,\n",
443+
" value=\"2000-01-01 00:00:00\"\n",
444+
" ),\n",
445+
" # Include original metadata otherwise it will be removed\n",
446+
" lb.DataRowMetadataField(\n",
447+
" schema_id=mdo.reserved_by_name[\"tag\"].uid,\n",
448+
" value=\"tag_string\",\n",
449+
" ),\n",
450+
" ]\n",
451+
"}\n",
452+
"\n",
453+
"task5 = dataset_2.upsert_data_rows([data])\n",
454+
"task5.wait_till_done()\n",
455+
"print(\"ERRORS: \" , task5.errors)\n",
456+
"print(\"RESULTS:\" , task5.result)"
403457
],
404458
"cell_type": "code",
405459
"outputs": [],
@@ -408,16 +462,34 @@
408462
{
409463
"metadata": {},
410464
"source": [
411-
"### Create a single attachemt on an existing data row"
465+
"### Create a single attachment on an existing data row"
412466
],
413467
"cell_type": "markdown"
414468
},
415469
{
416470
"metadata": {},
417471
"source": [
418472
"# You can only create one attachment at the time.\n",
419-
"data_row.create_attachment(attachment_type=\"RAW_TEXT\",\n",
420-
" attachment_value=\"LABELERS WILL SEE THIS \")"
473+
"DATA_ROW_ID = \"<DATA-ROW-ID>\"\n",
474+
"data_row = client.get_data_row(DATA_ROW_ID)\n",
475+
"attachment = data_row.create_attachment(attachment_type=\"RAW_TEXT\",\n",
476+
" attachment_value=\"LABELERS WILL SEE THIS\")"
477+
],
478+
"cell_type": "code",
479+
"outputs": [],
480+
"execution_count": null
481+
},
482+
{
483+
"metadata": {},
484+
"source": [
485+
"Update a recently created attachment "
486+
],
487+
"cell_type": "markdown"
488+
},
489+
{
490+
"metadata": {},
491+
"source": [
492+
"attachment.update(type= \"RAW_TEXT\", value=\"NEW RAW TEXT\")"
421493
],
422494
"cell_type": "code",
423495
"outputs": [],
@@ -440,7 +512,8 @@
440512
{
441513
"metadata": {},
442514
"source": [
443-
"data_row = client.get_data_row(\"<data_row_id_to_delete>\")\n",
515+
"DATAROW_ID_TO_DELETE = \"<DATA-ROW-ID>\"\n",
516+
"data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n",
444517
"data_row.delete()"
445518
],
446519
"cell_type": "code",
@@ -465,4 +538,4 @@
465538
"execution_count": null
466539
}
467540
]
468-
}
541+
}

0 commit comments

Comments
 (0)