Skip to content

[PLT-1392] [PLT-1350] Remove SDK methods for exports v1 #1800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 0 additions & 53 deletions libs/labelbox/src/labelbox/schema/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,59 +87,6 @@ def remove_queued_data_rows(self) -> None:
},
experimental=True)

def export_data_rows(self,
timeout_seconds=120,
include_metadata: bool = False) -> Generator:
""" Returns a generator that produces all data rows that are currently
in this batch.

Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
until the end of the cache period.

Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Generator that yields DataRow objects belonging to this batch.
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)

id_param = "batchId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportBatchDataRows(data:{batchId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}}
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportBatchDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug("Batch '%s' data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def delete(self) -> None:
""" Deletes the given batch.

Expand Down
52 changes: 0 additions & 52 deletions libs/labelbox/src/labelbox/schema/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,58 +337,6 @@ def data_row_for_external_id(self, external_id) -> "DataRow":
external_id)
return data_rows[0]

def export_data_rows(self,
timeout_seconds=120,
include_metadata: bool = False) -> Generator:
""" Returns a generator that produces all data rows that are currently
attached to this dataset.

Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
until the end of the cache period.

Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Generator that yields DataRow objects belonging to this dataset.
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
id_param = "datasetId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetDatasetDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportDatasetDataRows(data:{datasetId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status}}
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportDatasetDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = parser.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug("Dataset '%s' data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(
self,
task_name: Optional[str] = None,
Expand Down
51 changes: 0 additions & 51 deletions libs/labelbox/src/labelbox/schema/model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,57 +461,6 @@ def get_config(self) -> Dict[str, Any]:
experimental=True)
return res["modelRun"]["trainingMetadata"]

@experimental
def export_labels(
self,
download: bool = False,
timeout_seconds: int = 600
) -> Optional[Union[str, List[Dict[Any, Any]]]]:
"""
Experimental. To use, make sure client has enable_experimental=True.

Fetches Labels from the ModelRun

Args:
download (bool): Returns the url if False
Returns:
URL of the data file with this ModelRun's labels.
If download=True, this instead returns the contents as NDJSON format.
If the server didn't generate during the `timeout_seconds` period,
None is returned.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
sleep_time = 2
query_str = """mutation exportModelRunAnnotationsPyApi($modelRunId: ID!) {
exportModelRunAnnotations(data: {modelRunId: $modelRunId}) {
downloadUrl createdAt status
}
}
"""

while True:
url = self.client.execute(
query_str, {'modelRunId': self.uid},
experimental=True)['exportModelRunAnnotations']['downloadUrl']

if url:
if not download:
return url
else:
response = requests.get(url)
response.raise_for_status()
return parser.loads(response.content)

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
return None

logger.debug("ModelRun '%s' label export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(self,
task_name: Optional[str] = None,
params: Optional[ModelRunExportParams] = None) -> ExportTask:
Expand Down
152 changes: 0 additions & 152 deletions libs/labelbox/src/labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,157 +313,6 @@ def labels(self, datasets=None, order_by=None) -> PaginatedCollection:
return PaginatedCollection(self.client, query_str, {id_param: self.uid},
["project", "labels"], Label)

def export_queued_data_rows(
self,
timeout_seconds=120,
include_metadata: bool = False) -> List[Dict[str, str]]:
""" Returns all data rows that are currently enqueued for this project.

Args:
timeout_seconds (float): Max waiting time, in seconds.
include_metadata (bool): True to return related DataRow metadata
Returns:
Data row fields for all data rows in the queue as json
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)
id_param = "projectId"
metadata_param = "includeMetadataInput"
query_str = """mutation GetQueuedDataRowsExportUrlPyApi($%s: ID!, $%s: Boolean!)
{exportQueuedDataRows(data:{projectId: $%s , includeMetadataInput: $%s}) {downloadUrl createdAt status} }
""" % (id_param, metadata_param, id_param, metadata_param)
sleep_time = 2
start_time = time.time()
while True:
res = self.client.execute(query_str, {
id_param: self.uid,
metadata_param: include_metadata
})
res = res["exportQueuedDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
return parser.loads(response.text)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

current_time = time.time()
if current_time - start_time > timeout_seconds:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug(
"Project '%s' queued data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export_labels(self,
download=False,
timeout_seconds=1800,
**kwargs) -> Optional[Union[str, List[Dict[Any, Any]]]]:
""" Calls the server-side Label exporting that generates a JSON
payload, and returns the URL to that payload.

Will only generate a new URL at a max frequency of 30 min.

Args:
download (bool): Returns the url if False
timeout_seconds (float): Max waiting time, in seconds.
start (str): Earliest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
end (str): Latest date for labels, formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
last_activity_start (str): Will include all labels that have had any updates to
data rows, issues, comments, metadata, or reviews since this timestamp.
formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"
last_activity_end (str): Will include all labels that do not have any updates to
data rows, issues, comments, metadata, or reviews after this timestamp.
formatted "YYYY-MM-DD" or "YYYY-MM-DD hh:mm:ss"

Returns:
URL of the data file with this Project's labels. If the server didn't
generate during the `timeout_seconds` period, None is returned.
"""
warnings.warn(
"You are currently utilizing exports v1 for this action, which will be deprecated after April 30th, 2024. We recommend transitioning to exports v2. To view export v2 details, visit our docs: https://docs.labelbox.com/reference/label-export",
DeprecationWarning)

def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
"""Returns a concatenated string of the dictionary's keys and values

The string will be formatted as {key}: 'value' for each key. Value will be inclusive of
quotations while key will not. This can be toggled with `value_with_quotes`"""

quote = "\"" if value_with_quotes else ""
return ",".join([
f"""{c}: {quote}{dictionary.get(c)}{quote}"""
for c in dictionary
if dictionary.get(c)
])

sleep_time = 2
id_param = "projectId"
filter_param = ""
filter_param_dict = {}

if "start" in kwargs or "end" in kwargs:
created_at_dict = {
"start": kwargs.get("start", ""),
"end": kwargs.get("end", "")
}
[validate_datetime(date) for date in created_at_dict.values()]
filter_param_dict["labelCreatedAt"] = "{%s}" % _string_from_dict(
created_at_dict, value_with_quotes=True)

if "last_activity_start" in kwargs or "last_activity_end" in kwargs:
last_activity_start = kwargs.get('last_activity_start')
last_activity_end = kwargs.get('last_activity_end')

if last_activity_start:
validate_datetime(str(last_activity_start))
if last_activity_end:
validate_datetime(str(last_activity_end))

filter_param_dict["lastActivityAt"] = "{%s}" % _string_from_dict(
{
"start": last_activity_start,
"end": last_activity_end
},
value_with_quotes=True)

if filter_param_dict:
filter_param = """, filters: {%s }""" % (_string_from_dict(
filter_param_dict, value_with_quotes=False))

query_str = """mutation GetLabelExportUrlPyApi($%s: ID!)
{exportLabels(data:{projectId: $%s%s}) {downloadUrl createdAt shouldPoll} }
""" % (id_param, id_param, filter_param)

start_time = time.time()

while True:
res = self.client.execute(query_str, {id_param: self.uid})
res = res["exportLabels"]
if not res["shouldPoll"] and res["downloadUrl"] is not None:
url = res['downloadUrl']
if not download:
return url
else:
response = requests.get(url)
response.raise_for_status()
return response.json()

current_time = time.time()
if current_time - start_time > timeout_seconds:
return None

logger.debug("Project '%s' label export, waiting for server...",
self.uid)
time.sleep(sleep_time)

def export(
self,
task_name: Optional[str] = None,
Expand Down Expand Up @@ -1944,4 +1793,3 @@ class LabelingParameterOverride(DbObject):
"consensus average_benchmark_agreement last_activity_time")
LabelerPerformance.__doc__ = (
"Named tuple containing info about a labeler's performance.")

Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ def create_data_rows(self, args):
def wait_till_done(self):
pass

def export_data_rows(self):
for export in self.exports:
yield export


def test_generator(list_of_labels):
generator = LabelGenerator([list_of_labels[0]])
Expand Down
19 changes: 0 additions & 19 deletions libs/labelbox/tests/data/export/legacy/test_export_catalog.py

This file was deleted.

Loading
Loading