From 7ba272c40490e25238a6323a70503fa5f9d4d18f Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Fri, 24 May 2024 14:21:25 +0100 Subject: [PATCH 1/6] Modify get_overview to support duplicate names and provide a simplify output --- libs/labelbox/src/labelbox/schema/project.py | 127 +++++++++++------- .../src/labelbox/schema/project_overview.py | 77 ++++++++--- 2 files changed, 136 insertions(+), 68 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 9ffed2816..423f5bb09 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -41,7 +41,7 @@ from labelbox.schema.task import Task from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import (EditorTaskType, OntologyKind) -from labelbox.schema.project_overview import ProjectOverview +from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed if TYPE_CHECKING: from labelbox import BulkImportRequest @@ -1753,62 +1753,97 @@ def __check_data_rows_have_been_processed( return response["queryAllDataRowsHaveBeenProcessed"][ "allDataRowsHaveBeenProcessed"] - def get_overview(self) -> ProjectOverview: - """ Return the number of data rows per task queue, and issues of a project - Equivalent of the Overview tab of a project + def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]: + """Return the overview of a project. + + This method returns the number of data rows per task queue and issues of a project, + which is equivalent to the Overview tab of a project. + + Args: + details (bool, optional): Whether to include detailed queue information for review and rework queues. + Defaults to False. + + Returns: + Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview. + If `details` is False, returns a `ProjectOverview` object. + If `details` is True, returns a `ProjectOverviewDetailed` object. + + Raises: + Exception: If there is an error executing the query. + + """ + def _build_queue_details(overview_category: str, queue_type: str, total: int): + """ + Builds the queue details for a given overview category and queue type. Args: - with_issues: (optional) boolean to include issues in the overview + overview_category (str): The overview category. + queue_type (str): The queue type. + total (int): The total number of items in the queue. + Returns: - Object Project_Overview + dict: A dictionary containing the queue details. + - data (list): A list of dictionaries representing the queues. + Each dictionary contains the queue name and the number of data rows. + - total (int): The total number of data rows for the category """ + queues = [ + {tq["name"]: tq.get("dataRowCount")} + for tq in result.get("taskQueues") + if tq.get("queueType") == queue_type + ] + + return { + "data": queues, + "total": total + } - query = """query ProjectGetOverviewPyApi($projectId: ID!) { - project(where: { id: $projectId }) { - workstreamStateCounts { - state - count - } - taskQueues { - queueType - name - dataRowCount - } - issues { - totalCount - } - completedDataRowCount + query = """query ProjectGetOverviewPyApi($projectId: ID!) { + project(where: { id: $projectId }) { + workstreamStateCounts { + state + count + } + taskQueues { + queueType + name + dataRowCount + } + issues { + totalCount + } + completedDataRowCount + } } - } - """ + """ - # Must use experimental to access "issues" - result = self.client.execute(query, {"projectId": self.uid}, - experimental=True)["project"] + # Must use experimental to access "issues" + result = self.client.execute(query, {"projectId": self.uid}, + experimental=True)["project"] - overview = { - utils.snake_case(st["state"]): st["count"] - for st in result.get("workstreamStateCounts") - if st["state"] != "NotInTaskQueue" - } - - review_queues = { - tq["name"]: tq.get("dataRowCount") - for tq in result.get("taskQueues") - if tq.get("queueType") == "MANUAL_REVIEW_QUEUE" - } - - # Store the total number of data rows in review - review_queues["all"] = overview.get("in_review") - overview["in_review"] = review_queues + # Reformat category names + overview = { + utils.snake_case(st["state"]): st["count"] + for st in result.get("workstreamStateCounts") + if st["state"] != "NotInTaskQueue" + } - overview["issues"] = result.get("issues", {}).get("totalCount") + overview["issues"] = result.get("issues", {}).get("totalCount") - # Rename keys - overview["to_label"] = overview.pop("unlabeled") - overview["all_in_data_rows"] = overview.pop("all") + # Rename categories + overview["to_label"] = overview.pop("unlabeled") + overview["total_data_rows"] = overview.pop("all") - return ProjectOverview(**overview) + if not details: + return ProjectOverview(**overview) + else: + # Build queue details for review and rework queues + for category in ["rework", "review"]: + overview[f"in_{category}"] = _build_queue_details(f"in_{category}", + f"MANUAL_{category.upper()}_QUEUE", + overview[f"in_{category}"]) + + return ProjectOverviewDetailed(**overview) def clone(self) -> "Project": """ diff --git a/libs/labelbox/src/labelbox/schema/project_overview.py b/libs/labelbox/src/labelbox/schema/project_overview.py index 0b517615e..d4ad94b63 100644 --- a/libs/labelbox/src/labelbox/schema/project_overview.py +++ b/libs/labelbox/src/labelbox/schema/project_overview.py @@ -1,38 +1,71 @@ -from typing import Dict +from typing import Dict, List, TypedDict from labelbox.pydantic_compat import BaseModel -from typing import Dict - class ProjectOverview(BaseModel): """ Class that represents a project summary as displayed in the UI, in Annotate, under the "Overview" tab of a particular project. All attributes represent the number of data rows in the corresponding state. - The `in_review` attribute is a dictionary where the keys are the queue names - and the values are the number of data rows in that queue. + The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label). + The `in_review` attribute is a dictionary where the keys are the queue names and the values are the number of data rows in that queue (In Review). + The `in_rework` attribute represents the number of data rows that are in the Rework queue (In Rework). + The `skipped` attribute represents the number of data rows that have been skipped (Skipped). + The `done` attribute represents the number of data rows that have been marked as Done (Done). + The `issues` attribute represents the number of data rows with associated issues (Issues). - Attributes: - Representing existing fields from the Overview tag (UI names in parentheses): - - to_label (int): The number of data rows that are yet to be labeled (To Label). - in_review (Dict[str, int]): A dictionary where the keys are the queue names . - and the values are the number of data rows in that queue. (In Review) - in_rework (int): The number of data rows that are in the Rework queue (In Rework). - skipped (int): The number of data rows that have been skipped (Skipped). - done (int): The number of data rows that have been marked as Done (Done). - issues (int): The number of data rows with associated issues (Issues). - - Additional values: - - labeled (int): The number of data rows that have been labeled. - all_in_data_rows (int): The total number of data rows in the project. + The following don't appear in the UI + The `labeled` attribute represents the number of data rows that have been labeled. + The `total_data_rows` attribute represents the total number of data rows in the project. """ to_label: int - in_review: Dict[str, int] + in_review: int in_rework: int skipped: int done: int issues: int labeled: int - all_in_data_rows: int \ No newline at end of file + total_data_rows: int + + +class _QueueDetail(TypedDict): + """ + Class that represents the detailed information of the queues in the project overview. + The `data` attribute is a list of dictionaries where the keys are the queue names + and the values are the number of data rows in that queue. + """ + data: List[Dict[str, int]] + total: int + + +class ProjectOverviewDetailed(BaseModel): + """ + Class that represents a project summary as displayed in the UI, in Annotate, + under the "Overview" tab of a particular project. + This class adds the list of task queues for the `in_review` and `in_rework` attributes. + + All attributes represent the number of data rows in the corresponding state. + The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label). + The `in_review` attribute is a dictionary where the keys are (In Review): + data: a list of dictionaries with the queue name and the number of data rows + total: the total number of data rows in the In Review state + The `in_rework` attribute is a dictionary where the keys are (In Rework): + data: a list of dictionaries with the queue name and the number of data rows + total: the total number of data rows in the In Rework state + The `skipped` attribute represents the number of data rows that have been skipped (Skipped). + The `done` attribute represents the number of data rows that have been marked as Done (Done). + The `issues` attribute represents the number of data rows with associated issues (Issues). + + The following don't appear in the UI + The `labeled` attribute represents the number of data rows that have been labeled. + The `total_data_rows` attribute represents the total number of data rows in the project. + """ + + to_label: int + in_review: _QueueDetail + in_rework: _QueueDetail + skipped: int + done: int + issues: int + labeled: int + total_data_rows: int \ No newline at end of file From cfe7d0c2e0569896e87f17067d47bf43534c955a Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Fri, 24 May 2024 14:48:30 +0100 Subject: [PATCH 2/6] Update test --- .../tests/integration/test_task_queue.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/integration/test_task_queue.py b/libs/labelbox/tests/integration/test_task_queue.py index 1bd721b51..190e29455 100644 --- a/libs/labelbox/tests/integration/test_task_queue.py +++ b/libs/labelbox/tests/integration/test_task_queue.py @@ -11,11 +11,11 @@ def test_get_task_queue(project: Project): assert review_queue -def test_get_overview(project: Project): +def test_get_overview_no_details(project: Project): po = project.get_overview() assert isinstance(po.to_label, int) - assert isinstance(po.in_review, dict) + assert isinstance(po.in_review, int) assert isinstance(po.in_rework, int) assert isinstance(po.skipped, int) assert isinstance(po.done, int) @@ -23,6 +23,19 @@ def test_get_overview(project: Project): assert isinstance(po.labeled, int) assert isinstance(po.all_in_data_rows, int) +def test_get_overview_with_details(project: Project): + po = project.get_overview(details=True) + + assert isinstance(po.to_label, int) + assert isinstance(po.in_review["data"], list) + assert isinstance(po.in_review["total"], int) + assert isinstance(po.in_rework["data"], list) + assert isinstance(po.in_rework["total"], int) + assert isinstance(po.skipped, int) + assert isinstance(po.done, int) + assert isinstance(po.issues, int) + assert isinstance(po.labeled, int) + assert isinstance(po.all_in_data_rows, int) def _validate_moved(project, queue_name, data_row_count): timeout_seconds = 30 From 8ce9e994ef9b2bb1612286108ce5d75187aa7762 Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Fri, 24 May 2024 14:56:40 +0100 Subject: [PATCH 3/6] fix TypedDict as per tests --- libs/labelbox/src/labelbox/schema/project_overview.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/labelbox/src/labelbox/schema/project_overview.py b/libs/labelbox/src/labelbox/schema/project_overview.py index d4ad94b63..3e75e7282 100644 --- a/libs/labelbox/src/labelbox/schema/project_overview.py +++ b/libs/labelbox/src/labelbox/schema/project_overview.py @@ -1,5 +1,6 @@ -from typing import Dict, List, TypedDict +from typing import Dict, List from labelbox.pydantic_compat import BaseModel +from typing_extensions import TypedDict class ProjectOverview(BaseModel): """ From 62b24a6bb4cb8865bae573e255d6984c5ac7ad9a Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Fri, 24 May 2024 15:04:12 +0100 Subject: [PATCH 4/6] fix test --- libs/labelbox/tests/integration/test_task_queue.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/tests/integration/test_task_queue.py b/libs/labelbox/tests/integration/test_task_queue.py index 190e29455..2a6ca45d8 100644 --- a/libs/labelbox/tests/integration/test_task_queue.py +++ b/libs/labelbox/tests/integration/test_task_queue.py @@ -21,7 +21,7 @@ def test_get_overview_no_details(project: Project): assert isinstance(po.done, int) assert isinstance(po.issues, int) assert isinstance(po.labeled, int) - assert isinstance(po.all_in_data_rows, int) + assert isinstance(po.total_data_rows, int) def test_get_overview_with_details(project: Project): po = project.get_overview(details=True) @@ -35,7 +35,7 @@ def test_get_overview_with_details(project: Project): assert isinstance(po.done, int) assert isinstance(po.issues, int) assert isinstance(po.labeled, int) - assert isinstance(po.all_in_data_rows, int) + assert isinstance(po.total_data_rows, int) def _validate_moved(project, queue_name, data_row_count): timeout_seconds = 30 From e811ad64a20ef673cab2ed7b4fdd251db7d102ac Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Fri, 24 May 2024 22:54:50 +0100 Subject: [PATCH 5/6] Expose ProjectOverview and ProjectOverviewDetailed --- docs/labelbox/project-overview.rst | 2 +- libs/labelbox/src/labelbox/__init__.py | 1 + libs/labelbox/src/labelbox/schema/__init__.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/labelbox/project-overview.rst b/docs/labelbox/project-overview.rst index 3157eef13..536921955 100644 --- a/docs/labelbox/project-overview.rst +++ b/docs/labelbox/project-overview.rst @@ -1,4 +1,4 @@ -ProjectOverview +Project Overview =============================================================================================== .. automodule:: labelbox.schema.project-overview diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 85620f851..e638ea1b0 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -39,3 +39,4 @@ from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 10a3e08ee..32f28e7b6 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -25,3 +25,4 @@ import labelbox.schema.identifiable import labelbox.schema.catalog import labelbox.schema.ontology_kind +import labelbox.schema.project_overview \ No newline at end of file From 700982c2470e89654ae4cec95d6be45290d43228 Mon Sep 17 00:00:00 2001 From: paulnoirel <87332996+paulnoirel@users.noreply.github.com> Date: Mon, 27 May 2024 10:35:09 +0100 Subject: [PATCH 6/6] Fixed format and remove nested function --- libs/labelbox/src/labelbox/schema/project.py | 133 ++++++++----------- 1 file changed, 57 insertions(+), 76 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 423f5bb09..56c1592de 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1754,96 +1754,77 @@ def __check_data_rows_have_been_processed( "allDataRowsHaveBeenProcessed"] def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]: - """Return the overview of a project. + """Return the overview of a project. - This method returns the number of data rows per task queue and issues of a project, - which is equivalent to the Overview tab of a project. + This method returns the number of data rows per task queue and issues of a project, + which is equivalent to the Overview tab of a project. - Args: - details (bool, optional): Whether to include detailed queue information for review and rework queues. - Defaults to False. + Args: + details (bool, optional): Whether to include detailed queue information for review and rework queues. + Defaults to False. - Returns: - Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview. - If `details` is False, returns a `ProjectOverview` object. - If `details` is True, returns a `ProjectOverviewDetailed` object. + Returns: + Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview. + If `details` is False, returns a `ProjectOverview` object. + If `details` is True, returns a `ProjectOverviewDetailed` object. - Raises: - Exception: If there is an error executing the query. + Raises: + Exception: If there is an error executing the query. - """ - def _build_queue_details(overview_category: str, queue_type: str, total: int): - """ - Builds the queue details for a given overview category and queue type. + """ + query = """query ProjectGetOverviewPyApi($projectId: ID!) { + project(where: { id: $projectId }) { + workstreamStateCounts { + state + count + } + taskQueues { + queueType + name + dataRowCount + } + issues { + totalCount + } + completedDataRowCount + } + } + """ - Args: - overview_category (str): The overview category. - queue_type (str): The queue type. - total (int): The total number of items in the queue. + # Must use experimental to access "issues" + result = self.client.execute(query, {"projectId": self.uid}, + experimental=True)["project"] - Returns: - dict: A dictionary containing the queue details. - - data (list): A list of dictionaries representing the queues. - Each dictionary contains the queue name and the number of data rows. - - total (int): The total number of data rows for the category - """ + # Reformat category names + overview = { + utils.snake_case(st["state"]): st["count"] + for st in result.get("workstreamStateCounts") + if st["state"] != "NotInTaskQueue" + } + + overview["issues"] = result.get("issues", {}).get("totalCount") + + # Rename categories + overview["to_label"] = overview.pop("unlabeled") + overview["total_data_rows"] = overview.pop("all") + + if not details: + return ProjectOverview(**overview) + else: + # Build dictionary for queue details for review and rework queues + for category in ["rework", "review"]: queues = [ {tq["name"]: tq.get("dataRowCount")} for tq in result.get("taskQueues") - if tq.get("queueType") == queue_type + if tq.get("queueType") == f"MANUAL_{category.upper()}_QUEUE" ] - return { + overview[f"in_{category}"] = { "data": queues, - "total": total - } - - query = """query ProjectGetOverviewPyApi($projectId: ID!) { - project(where: { id: $projectId }) { - workstreamStateCounts { - state - count - } - taskQueues { - queueType - name - dataRowCount - } - issues { - totalCount - } - completedDataRowCount + "total": overview[f"in_{category}"] } - } - """ - - # Must use experimental to access "issues" - result = self.client.execute(query, {"projectId": self.uid}, - experimental=True)["project"] - - # Reformat category names - overview = { - utils.snake_case(st["state"]): st["count"] - for st in result.get("workstreamStateCounts") - if st["state"] != "NotInTaskQueue" - } - - overview["issues"] = result.get("issues", {}).get("totalCount") - - # Rename categories - overview["to_label"] = overview.pop("unlabeled") - overview["total_data_rows"] = overview.pop("all") - - if not details: - return ProjectOverview(**overview) - else: - # Build queue details for review and rework queues - for category in ["rework", "review"]: - overview[f"in_{category}"] = _build_queue_details(f"in_{category}", - f"MANUAL_{category.upper()}_QUEUE", - overview[f"in_{category}"]) - - return ProjectOverviewDetailed(**overview) + + return ProjectOverviewDetailed(**overview) def clone(self) -> "Project": """