224
224
{
225
225
"metadata" : {},
226
226
"source" : [
227
- " ### Create\n " ,
228
- " * Create a single data row with and without metadata "
227
+ " ## Create\n " ,
228
+ " We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, "
229
229
],
230
230
"cell_type" : " markdown"
231
231
},
232
232
{
233
233
"metadata" : {},
234
234
"source" : [
235
- " dataset = client.create_dataset(name=\" data_rows_demo_dataset\" )\n " ,
236
- " \n " ,
237
- " # It is recommended that you add global keys to your data rows.\n " ,
238
- " dataset.create_data_row(row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0002.jpeg\" ,\n " ,
239
- " global_key=str(uuid.uuid4()))\n " ,
240
- " \n " ,
241
- " # You can also upload metadata along with your data row\n " ,
242
- " mdo = client.get_data_row_metadata_ontology()\n " ,
243
- " dataset.create_data_row(row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\" ,\n " ,
244
- " global_key=str(uuid.uuid4()),\n " ,
245
- " metadata_fields=[\n " ,
246
- " lb.DataRowMetadataField(\n " ,
247
- " schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
248
- " value=\" tag_string\" , # typed inputs\n " ,
249
- " ),\n " ,
250
- " ],\n " ,
251
- " )"
235
+ " ### Create data rows via `dataset.upsert_data_rows()`"
252
236
],
253
- "cell_type" : " code" ,
254
- "outputs" : [],
255
- "execution_count" : null
237
+ "cell_type" : " markdown"
256
238
},
257
239
{
258
240
"metadata" : {},
259
241
"source" : [
260
- " ### [Recommended] Bulk create data rows (This is much faster than creating individual data rows)"
242
+ " # Create a dataset\n " ,
243
+ " dataset = client.create_dataset(name=\" data_rows_demo_dataset_6\" )\n " ,
244
+ " # You can also upload metadata along with your data row\n " ,
245
+ " mdo = client.get_data_row_metadata_ontology()"
261
246
],
262
- "cell_type" : " markdown"
247
+ "cell_type" : " code" ,
248
+ "outputs" : [],
249
+ "execution_count" : null
263
250
},
264
251
{
265
252
"metadata" : {},
266
253
"source" : [
267
- " # Create a dataset\n " ,
268
- " dataset = client.create_dataset(name=\" data_rows_demo_dataset_2\" )\n " ,
269
- " \n " ,
270
254
" uploads = []\n " ,
271
255
" # Generate data rows\n " ,
272
- " for i in range(1,9 ):\n " ,
256
+ " for i in range(1,8 ):\n " ,
273
257
" uploads.append({\n " ,
274
258
" \" row_data\" : f\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\" ,\n " ,
275
259
" \" global_key\" : \" TEST-ID-%id\" % uuid.uuid1(),\n " ,
279
263
" schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
280
264
" value=\" tag_string\" , # typed inputs\n " ,
281
265
" ),\n " ,
266
+ " ],\n " ,
267
+ " \" attachments\" : [\n " ,
268
+ " {\n " ,
269
+ " \" type\" : \" IMAGE_OVERLAY\" ,\n " ,
270
+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
271
+ " },\n " ,
272
+ " {\n " ,
273
+ " \" type\" : \" RAW_TEXT\" ,\n " ,
274
+ " \" value\" : \" IOWA, Zone 2232, June 2022 [Text string]\"\n " ,
275
+ " },\n " ,
276
+ " {\n " ,
277
+ " \" type\" : \" TEXT_URL\" ,\n " ,
278
+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n " ,
279
+ " },\n " ,
280
+ " {\n " ,
281
+ " \" type\" : \" IMAGE\" ,\n " ,
282
+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
283
+ " },\n " ,
284
+ " {\n " ,
285
+ " \" type\" : \" VIDEO\" ,\n " ,
286
+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n " ,
287
+ " },\n " ,
288
+ " {\n " ,
289
+ " \" type\" : \" HTML\" ,\n " ,
290
+ " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n " ,
291
+ " },\n " ,
292
+ " {\n " ,
293
+ " \" type\" : \" PDF_URL\" ,\n " ,
294
+ " \" value\" : \" https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n " ,
295
+ " }\n " ,
282
296
" ]\n " ,
283
297
" })\n " ,
284
298
" \n " ,
285
- " task1 = dataset.create_data_rows (uploads)\n " ,
299
+ " task1 = dataset.upsert_data_rows (uploads)\n " ,
286
300
" task1.wait_till_done()\n " ,
287
301
" print(\" ERRORS: \" , task1.errors)\n " ,
288
302
" print(\" RESULTS:\" , task1.result)"
294
308
{
295
309
"metadata" : {},
296
310
"source" : [
297
- " ### Create data rows with attachments "
311
+ " Create data rows from data in your local path "
298
312
],
299
313
"cell_type" : " markdown"
300
314
},
301
315
{
302
316
"metadata" : {},
303
317
"source" : [
304
- " task2 = dataset.create_data_rows([{\n " ,
318
+ " from PIL import Image\n " ,
319
+ " \n " ,
320
+ " # Create dummy empty jpeg file\n " ,
321
+ " width = 400\n " ,
322
+ " height = 300\n " ,
323
+ " color = (255, 255, 255) # White color\n " ,
324
+ " image = Image.new(\" RGB\" , (width, height), color)\n " ,
325
+ " \n " ,
326
+ " # Save the image as a JPEG file\n " ,
327
+ " image.save(\" dummy.jpg\" )\n " ,
328
+ " \n " ,
329
+ " local_data_path = \" dummy.jpg\"\n " ,
330
+ " \n " ,
331
+ " data = {\n " ,
332
+ " \" row_data\" : local_data_path,\n " ,
333
+ " \" global_key\" : str(uuid.uuid4())\n " ,
334
+ " }\n " ,
335
+ " \n " ,
336
+ " task3 = dataset.upsert_data_rows([data])\n " ,
337
+ " task3.wait_till_done()\n " ,
338
+ " print(\" ERRORS: \" , task3.errors)\n " ,
339
+ " print(\" RESULTS:\" , task3.result)"
340
+ ],
341
+ "cell_type" : " code" ,
342
+ "outputs" : [],
343
+ "execution_count" : null
344
+ },
345
+ {
346
+ "metadata" : {},
347
+ "source" : [
348
+ " # You can mix local files with urls when creating data rows\n " ,
349
+ " task4 = dataset.upsert_data_rows([{\n " ,
305
350
" \" row_data\" : \" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\" ,\n " ,
306
- " \" global_key\" : str(uuid.uuid4()),\n " ,
307
- " \" attachments\" : [\n " ,
308
- " {\n " ,
309
- " \" type\" : \" IMAGE_OVERLAY\" ,\n " ,
310
- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
311
- " },\n " ,
312
- " {\n " ,
313
- " \" type\" : \" RAW_TEXT\" ,\n " ,
314
- " \" value\" : \" IOWA, Zone 2232, June 2022 [Text string]\"\n " ,
315
- " },\n " ,
316
- " {\n " ,
317
- " \" type\" : \" TEXT_URL\" ,\n " ,
318
- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\"\n " ,
319
- " },\n " ,
320
- " {\n " ,
321
- " \" type\" : \" IMAGE\" ,\n " ,
322
- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\"\n " ,
323
- " },\n " ,
324
- " {\n " ,
325
- " \" type\" : \" VIDEO\" ,\n " ,
326
- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\"\n " ,
327
- " },\n " ,
328
- " {\n " ,
329
- " \" type\" : \" HTML\" ,\n " ,
330
- " \" value\" : \" https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\"\n " ,
331
- " },\n " ,
332
- " {\n " ,
333
- " \" type\" : \" PDF_URL\" ,\n " ,
334
- " \" value\" : \" https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n " ,
335
- " }\n " ,
336
- " ]\n " ,
351
+ " \" global_key\" : str(uuid.uuid4())\n " ,
352
+ " }, {\n " ,
353
+ " \" row_data\" : local_data_path,\n " ,
354
+ " \" global_key\" : str(uuid.uuid4())\n " ,
337
355
" }])\n " ,
338
- " print(\" ERRORS: \" , task2.errors)\n " ,
339
- " print(\" RESULTS:\" , task2.result)"
356
+ " task4.wait_till_done()\n " ,
357
+ " print(\" ERRORS: \" , task4.errors)\n " ,
358
+ " print(\" RESULTS:\" , task4.result)"
340
359
],
341
360
"cell_type" : " code" ,
342
361
"outputs" : [],
345
364
{
346
365
"metadata" : {},
347
366
"source" : [
348
- " ### Create data rows using data in your local path "
367
+ " ### Create data rows via `dataset.create_data_rows()` \n "
349
368
],
350
369
"cell_type" : " markdown"
351
370
},
352
371
{
353
372
"metadata" : {},
354
373
"source" : [
355
- " # Local paths\n " ,
356
- " local_data_path = \" /tmp/test_data_row.txt\"\n " ,
357
- " with open(local_data_path, 'w') as file:\n " ,
358
- " file.write(\" sample data\" )\n " ,
359
- " \n " ,
360
- " task3 = dataset.create_data_rows([local_data_path])\n " ,
361
- " print(\" ERRORS: \" , task3.errors)\n " ,
362
- " print(\" RESULTS:\" , task3.result)"
374
+ " dataset_2 = client.create_dataset(name=\" data_rows_demo_dataset_3\" )"
363
375
],
364
376
"cell_type" : " code" ,
365
377
"outputs" : [],
368
380
{
369
381
"metadata" : {},
370
382
"source" : [
371
- " # You can mix local files with urls when creating data rows\n " ,
372
- " task4 = dataset.create_data_rows([{\n " ,
373
- " \" row_data\" : \" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0003.jpeg\" ,\n " ,
374
- " \" global_key\" : str(uuid.uuid4())\n " ,
375
- " }, {\n " ,
376
- " \" row_data\" : local_data_path,\n " ,
377
- " \" global_key\" : str(uuid.uuid4())\n " ,
378
- " }])\n " ,
379
- " print(\" ERRORS: \" , task4.errors)\n " ,
380
- " print(\" RESULTS:\" , task4.result)"
383
+ " uploads = []\n " ,
384
+ " # Generate data rows\n " ,
385
+ " for i in range(1,9):\n " ,
386
+ " uploads.append({\n " ,
387
+ " \" row_data\" : f\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\" ,\n " ,
388
+ " \" global_key\" : \" TEST-ID-%id\" % uuid.uuid1(),\n " ,
389
+ " ## add metadata (optional)\n " ,
390
+ " \" metadata_fields\" : [\n " ,
391
+ " lb.DataRowMetadataField(\n " ,
392
+ " schema_id=mdo.reserved_by_name[\" tag\" ].uid, # specify the schema id\n " ,
393
+ " value=\" tag_string\" , # typed inputs\n " ,
394
+ " ),\n " ,
395
+ " ]\n " ,
396
+ " })\n " ,
397
+ " \n " ,
398
+ " task1_2 = dataset_2.create_data_rows(uploads)\n " ,
399
+ " task1_2.wait_till_done()\n " ,
400
+ " print(\" ERRORS: \" , task1_2.errors)\n " ,
401
+ " print(\" RESULTS:\" , task1_2.result)"
381
402
],
382
403
"cell_type" : " code" ,
383
404
"outputs" : [],
387
408
"metadata" : {},
388
409
"source" : [
389
410
" ### Update\n " ,
390
- " Only two fields can be updated after a data row is created \n " ,
391
- " 1. Global keys \n " ,
392
- " 2. Row data\n "
411
+ " `dataset.upsert_data_rows()` can also be use to update data rows \n " ,
412
+ " \n " ,
413
+ " To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values. \n "
393
414
],
394
415
"cell_type" : " markdown"
395
416
},
396
417
{
397
418
"metadata" : {},
398
419
"source" : [
399
- " data_row = client.get_data_row(\" <data_row_id_to_update>\" )\n " ,
400
- " new_id = str(uuid.uuid4())\n " ,
401
- " data_row.update(global_key=new_id, row_data=\" https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0005.jpeg\" )\n " ,
402
- " print(data_row)"
420
+ " # Fetch a data row from the first dataset example\n " ,
421
+ " ts = dataset.export()\n " ,
422
+ " ts.wait_till_done()\n " ,
423
+ " DATA_ROW_ID = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['id']\n " ,
424
+ " GLOBAL_KEY = [json.loads(output.json_str) for output in ts.get_stream()][0]['data_row']['global_key']\n " ,
425
+ " \n " ,
426
+ " print(f\" Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\" )\n "
427
+ ],
428
+ "cell_type" : " code" ,
429
+ "outputs" : [],
430
+ "execution_count" : null
431
+ },
432
+ {
433
+ "metadata" : {},
434
+ "source" : [
435
+ " # Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n " ,
436
+ " data = {\n " ,
437
+ " \" key\" : lb.UniqueId(DATA_ROW_ID),\n " ,
438
+ " \" global_key\" : \" NEW-ID-%id\" % uuid.uuid1(),\n " ,
439
+ " \" metadata_fields\" : [\n " ,
440
+ " # New metadata\n " ,
441
+ " lb.DataRowMetadataField(\n " ,
442
+ " schema_id=mdo.reserved_by_name['captureDateTime'].uid,\n " ,
443
+ " value=\" 2000-01-01 00:00:00\"\n " ,
444
+ " ),\n " ,
445
+ " # Include original metadata otherwise it will be removed\n " ,
446
+ " lb.DataRowMetadataField(\n " ,
447
+ " schema_id=mdo.reserved_by_name[\" tag\" ].uid,\n " ,
448
+ " value=\" tag_string\" ,\n " ,
449
+ " ),\n " ,
450
+ " ]\n " ,
451
+ " }\n " ,
452
+ " \n " ,
453
+ " task5 = dataset_2.upsert_data_rows([data])\n " ,
454
+ " task5.wait_till_done()\n " ,
455
+ " print(\" ERRORS: \" , task5.errors)\n " ,
456
+ " print(\" RESULTS:\" , task5.result)"
403
457
],
404
458
"cell_type" : " code" ,
405
459
"outputs" : [],
408
462
{
409
463
"metadata" : {},
410
464
"source" : [
411
- " ### Create a single attachemt on an existing data row"
465
+ " ### Create a single attachment on an existing data row"
412
466
],
413
467
"cell_type" : " markdown"
414
468
},
415
469
{
416
470
"metadata" : {},
417
471
"source" : [
418
472
" # You can only create one attachment at the time.\n " ,
419
- " data_row.create_attachment(attachment_type=\" RAW_TEXT\" ,\n " ,
420
- " attachment_value=\" LABELERS WILL SEE THIS \" )"
473
+ " DATA_ROW_ID = \" <DATA-ROW-ID>\"\n " ,
474
+ " data_row = client.get_data_row(DATA_ROW_ID)\n " ,
475
+ " attachment = data_row.create_attachment(attachment_type=\" RAW_TEXT\" ,\n " ,
476
+ " attachment_value=\" LABELERS WILL SEE THIS\" )"
477
+ ],
478
+ "cell_type" : " code" ,
479
+ "outputs" : [],
480
+ "execution_count" : null
481
+ },
482
+ {
483
+ "metadata" : {},
484
+ "source" : [
485
+ " Update a recently created attachment "
486
+ ],
487
+ "cell_type" : " markdown"
488
+ },
489
+ {
490
+ "metadata" : {},
491
+ "source" : [
492
+ " attachment.update(type= \" RAW_TEXT\" , value=\" NEW RAW TEXT\" )"
421
493
],
422
494
"cell_type" : " code" ,
423
495
"outputs" : [],
440
512
{
441
513
"metadata" : {},
442
514
"source" : [
443
- " data_row = client.get_data_row(\" <data_row_id_to_delete>\" )\n " ,
515
+ " DATAROW_ID_TO_DELETE = \" <DATA-ROW-ID>\"\n " ,
516
+ " data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n " ,
444
517
" data_row.delete()"
445
518
],
446
519
"cell_type" : " code" ,
465
538
"execution_count" : null
466
539
}
467
540
]
468
- }
541
+ }
0 commit comments