Skip to content

[PLT-1611] Vb/placeholder datarows #1851

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 12 additions & 26 deletions libs/labelbox/src/labelbox/schema/dataset.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,43 @@
from datetime import datetime
from typing import Dict, Generator, List, Optional, Any, Final, Tuple, Union
import os
import json
import logging
from collections.abc import Iterable
from string import Template
import time
import os
import warnings

from labelbox import parser
from itertools import islice

from concurrent.futures import ThreadPoolExecutor, as_completed
from io import StringIO
import requests
from itertools import islice
from string import Template
from typing import Any, Dict, List, Optional, Tuple, Union

import labelbox.schema.internal.data_row_uploader as data_row_uploader
from labelbox.exceptions import (
InvalidQueryError,
LabelboxError,
ResourceNotFoundError,
ResourceCreationError,
ResourceNotFoundError,
)
from labelbox.orm import query
from labelbox.orm.comparison import Comparison
from labelbox.orm.db_object import DbObject, Updateable, Deletable, experimental
from labelbox.orm.db_object import DbObject, Deletable, Updateable
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.orm import query
from labelbox.exceptions import MalformedQueryException
from labelbox.pagination import PaginatedCollection
from labelbox.schema.data_row import DataRow
from labelbox.schema.embedding import EmbeddingVector
from labelbox.schema.export_filters import DatasetExportFilters, build_filters
from labelbox.schema.export_params import (
CatalogExportParams,
validate_catalog_export_params,
)
from labelbox.schema.export_task import ExportTask
from labelbox.schema.identifiable import UniqueId, GlobalKey
from labelbox.schema.task import Task, DataUpsertTask
from labelbox.schema.user import User
from labelbox.schema.iam_integration import IAMIntegration
from labelbox.schema.identifiable import GlobalKey, UniqueId
from labelbox.schema.internal.data_row_upsert_item import (
DataRowCreateItem,
DataRowItemBase,
DataRowUpsertItem,
DataRowCreateItem,
)
import labelbox.schema.internal.data_row_uploader as data_row_uploader
from labelbox.schema.internal.descriptor_file_creator import (
DescriptorFileCreator,
)
from labelbox.schema.internal.datarow_upload_constants import (
FILE_UPLOAD_THREAD_COUNT,
UPSERT_CHUNK_SIZE_BYTES,
)
from labelbox.schema.task import DataUpsertTask, Task

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -359,7 +345,7 @@ def data_row_for_external_id(self, external_id) -> "DataRow":
)
if len(data_rows) > 1:
logger.warning(
f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
external_id,
)
return data_rows[0]
Expand Down
79 changes: 54 additions & 25 deletions libs/labelbox/tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,53 @@
from collections import defaultdict
from itertools import islice
import json
import os
import sys
import re
import time
import uuid
import requests
from types import SimpleNamespace
from typing import Type, List
from enum import Enum
from typing import Tuple
from collections import defaultdict
from datetime import datetime, timezone
from itertools import islice
from typing import Type

import pytest
import requests

from labelbox import Dataset, DataRow
from labelbox import LabelingFrontend
from labelbox import (
OntologyBuilder,
Tool,
Option,
Classification,
Client,
Dataset,
LabelingFrontend,
MediaType,
OntologyBuilder,
Option,
PromptResponseClassification,
ResponseOption,
Tool,
)
from labelbox.orm import query
from labelbox.pagination import PaginatedCollection
from labelbox.schema.annotation_import import LabelImport
from labelbox.schema.catalog import Catalog
from labelbox.schema.enums import AnnotationImportState
from labelbox.schema.invite import Invite
from labelbox.schema.quality_mode import QualityMode
from labelbox.schema.data_row import DataRowMetadataField
from labelbox.schema.ontology_kind import OntologyKind
from labelbox.schema.queue_mode import QueueMode
from labelbox.schema.user import User
from labelbox import Client
from labelbox.schema.ontology_kind import OntologyKind


@pytest.fixture
def constants():
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
EXPECTED_METADATA_SCHEMA_IDS = [
SPLIT_SCHEMA_ID,
TEST_SPLIT_ID,
TEXT_SCHEMA_ID,
CAPTURE_DT_SCHEMA_ID,
]
CUSTOM_TEXT_SCHEMA_NAME = "custom_text"

return {
"SPLIT_SCHEMA_ID": SPLIT_SCHEMA_ID,
"TEST_SPLIT_ID": TEST_SPLIT_ID,
"TEXT_SCHEMA_ID": TEXT_SCHEMA_ID,
"CAPTURE_DT_SCHEMA_ID": CAPTURE_DT_SCHEMA_ID,
"EXPECTED_METADATA_SCHEMA_IDS": EXPECTED_METADATA_SCHEMA_IDS,
"CUSTOM_TEXT_SCHEMA_NAME": CUSTOM_TEXT_SCHEMA_NAME,
}


@pytest.fixture
Expand Down Expand Up @@ -835,3 +846,21 @@ def print_perf_summary():
for aaa in islice(sorted_dict, num_of_entries)
]
print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)


@pytest.fixture
def make_metadata_fields(constants):
msg = "A message"
time = datetime.now(timezone.utc)

fields = [
DataRowMetadataField(
schema_id=constants["SPLIT_SCHEMA_ID"],
value=constants["TEST_SPLIT_ID"],
),
DataRowMetadataField(
schema_id=constants["CAPTURE_DT_SCHEMA_ID"], value=time
),
DataRowMetadataField(schema_id=constants["TEXT_SCHEMA_ID"], value=msg),
]
return fields
Loading
Loading