diff --git a/docs/labelbox/project-overview.rst b/docs/labelbox/project-overview.rst index 3157eef13..536921955 100644 --- a/docs/labelbox/project-overview.rst +++ b/docs/labelbox/project-overview.rst @@ -1,4 +1,4 @@ -ProjectOverview +Project Overview =============================================================================================== .. automodule:: labelbox.schema.project-overview diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 85620f851..e638ea1b0 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -39,3 +39,4 @@ from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed diff --git a/libs/labelbox/src/labelbox/schema/__init__.py b/libs/labelbox/src/labelbox/schema/__init__.py index 10a3e08ee..32f28e7b6 100644 --- a/libs/labelbox/src/labelbox/schema/__init__.py +++ b/libs/labelbox/src/labelbox/schema/__init__.py @@ -25,3 +25,4 @@ import labelbox.schema.identifiable import labelbox.schema.catalog import labelbox.schema.ontology_kind +import labelbox.schema.project_overview \ No newline at end of file diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 8f4b26aee..aa51cdc22 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -41,7 +41,7 @@ from labelbox.schema.task import Task from labelbox.schema.task_queue import TaskQueue from labelbox.schema.ontology_kind import (EditorTaskType, OntologyKind) -from labelbox.schema.project_overview import ProjectOverview +from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed if TYPE_CHECKING: from labelbox import BulkImportRequest @@ -1751,16 +1751,25 @@ def __check_data_rows_have_been_processed( return response["queryAllDataRowsHaveBeenProcessed"][ "allDataRowsHaveBeenProcessed"] - def get_overview(self) -> ProjectOverview: - """ Return the number of data rows per task queue, and issues of a project - Equivalent of the Overview tab of a project + def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]: + """Return the overview of a project. - Args: - with_issues: (optional) boolean to include issues in the overview - Returns: - Object Project_Overview - """ + This method returns the number of data rows per task queue and issues of a project, + which is equivalent to the Overview tab of a project. + + Args: + details (bool, optional): Whether to include detailed queue information for review and rework queues. + Defaults to False. + + Returns: + Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview. + If `details` is False, returns a `ProjectOverview` object. + If `details` is True, returns a `ProjectOverviewDetailed` object. + Raises: + Exception: If there is an error executing the query. + + """ query = """query ProjectGetOverviewPyApi($projectId: ID!) { project(where: { id: $projectId }) { workstreamStateCounts { @@ -1782,31 +1791,38 @@ def get_overview(self) -> ProjectOverview: # Must use experimental to access "issues" result = self.client.execute(query, {"projectId": self.uid}, - experimental=True)["project"] + experimental=True)["project"] + # Reformat category names overview = { utils.snake_case(st["state"]): st["count"] for st in result.get("workstreamStateCounts") if st["state"] != "NotInTaskQueue" } - review_queues = { - tq["name"]: tq.get("dataRowCount") - for tq in result.get("taskQueues") - if tq.get("queueType") == "MANUAL_REVIEW_QUEUE" - } - - # Store the total number of data rows in review - review_queues["all"] = overview.get("in_review") - overview["in_review"] = review_queues - overview["issues"] = result.get("issues", {}).get("totalCount") - # Rename keys + # Rename categories overview["to_label"] = overview.pop("unlabeled") - overview["all_in_data_rows"] = overview.pop("all") + overview["total_data_rows"] = overview.pop("all") - return ProjectOverview(**overview) + if not details: + return ProjectOverview(**overview) + else: + # Build dictionary for queue details for review and rework queues + for category in ["rework", "review"]: + queues = [ + {tq["name"]: tq.get("dataRowCount")} + for tq in result.get("taskQueues") + if tq.get("queueType") == f"MANUAL_{category.upper()}_QUEUE" + ] + + overview[f"in_{category}"] = { + "data": queues, + "total": overview[f"in_{category}"] + } + + return ProjectOverviewDetailed(**overview) def clone(self) -> "Project": """ diff --git a/libs/labelbox/src/labelbox/schema/project_overview.py b/libs/labelbox/src/labelbox/schema/project_overview.py index 0b517615e..3e75e7282 100644 --- a/libs/labelbox/src/labelbox/schema/project_overview.py +++ b/libs/labelbox/src/labelbox/schema/project_overview.py @@ -1,7 +1,6 @@ -from typing import Dict +from typing import Dict, List from labelbox.pydantic_compat import BaseModel - -from typing import Dict +from typing_extensions import TypedDict class ProjectOverview(BaseModel): """ @@ -9,30 +8,65 @@ class ProjectOverview(BaseModel): under the "Overview" tab of a particular project. All attributes represent the number of data rows in the corresponding state. - The `in_review` attribute is a dictionary where the keys are the queue names - and the values are the number of data rows in that queue. + The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label). + The `in_review` attribute is a dictionary where the keys are the queue names and the values are the number of data rows in that queue (In Review). + The `in_rework` attribute represents the number of data rows that are in the Rework queue (In Rework). + The `skipped` attribute represents the number of data rows that have been skipped (Skipped). + The `done` attribute represents the number of data rows that have been marked as Done (Done). + The `issues` attribute represents the number of data rows with associated issues (Issues). - Attributes: - Representing existing fields from the Overview tag (UI names in parentheses): - - to_label (int): The number of data rows that are yet to be labeled (To Label). - in_review (Dict[str, int]): A dictionary where the keys are the queue names . - and the values are the number of data rows in that queue. (In Review) - in_rework (int): The number of data rows that are in the Rework queue (In Rework). - skipped (int): The number of data rows that have been skipped (Skipped). - done (int): The number of data rows that have been marked as Done (Done). - issues (int): The number of data rows with associated issues (Issues). - - Additional values: - - labeled (int): The number of data rows that have been labeled. - all_in_data_rows (int): The total number of data rows in the project. + The following don't appear in the UI + The `labeled` attribute represents the number of data rows that have been labeled. + The `total_data_rows` attribute represents the total number of data rows in the project. """ to_label: int - in_review: Dict[str, int] + in_review: int in_rework: int skipped: int done: int issues: int labeled: int - all_in_data_rows: int \ No newline at end of file + total_data_rows: int + + +class _QueueDetail(TypedDict): + """ + Class that represents the detailed information of the queues in the project overview. + The `data` attribute is a list of dictionaries where the keys are the queue names + and the values are the number of data rows in that queue. + """ + data: List[Dict[str, int]] + total: int + + +class ProjectOverviewDetailed(BaseModel): + """ + Class that represents a project summary as displayed in the UI, in Annotate, + under the "Overview" tab of a particular project. + This class adds the list of task queues for the `in_review` and `in_rework` attributes. + + All attributes represent the number of data rows in the corresponding state. + The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label). + The `in_review` attribute is a dictionary where the keys are (In Review): + data: a list of dictionaries with the queue name and the number of data rows + total: the total number of data rows in the In Review state + The `in_rework` attribute is a dictionary where the keys are (In Rework): + data: a list of dictionaries with the queue name and the number of data rows + total: the total number of data rows in the In Rework state + The `skipped` attribute represents the number of data rows that have been skipped (Skipped). + The `done` attribute represents the number of data rows that have been marked as Done (Done). + The `issues` attribute represents the number of data rows with associated issues (Issues). + + The following don't appear in the UI + The `labeled` attribute represents the number of data rows that have been labeled. + The `total_data_rows` attribute represents the total number of data rows in the project. + """ + + to_label: int + in_review: _QueueDetail + in_rework: _QueueDetail + skipped: int + done: int + issues: int + labeled: int + total_data_rows: int \ No newline at end of file diff --git a/libs/labelbox/tests/integration/test_task_queue.py b/libs/labelbox/tests/integration/test_task_queue.py index 1bd721b51..2a6ca45d8 100644 --- a/libs/labelbox/tests/integration/test_task_queue.py +++ b/libs/labelbox/tests/integration/test_task_queue.py @@ -11,18 +11,31 @@ def test_get_task_queue(project: Project): assert review_queue -def test_get_overview(project: Project): +def test_get_overview_no_details(project: Project): po = project.get_overview() assert isinstance(po.to_label, int) - assert isinstance(po.in_review, dict) + assert isinstance(po.in_review, int) assert isinstance(po.in_rework, int) assert isinstance(po.skipped, int) assert isinstance(po.done, int) assert isinstance(po.issues, int) assert isinstance(po.labeled, int) - assert isinstance(po.all_in_data_rows, int) + assert isinstance(po.total_data_rows, int) +def test_get_overview_with_details(project: Project): + po = project.get_overview(details=True) + + assert isinstance(po.to_label, int) + assert isinstance(po.in_review["data"], list) + assert isinstance(po.in_review["total"], int) + assert isinstance(po.in_rework["data"], list) + assert isinstance(po.in_rework["total"], int) + assert isinstance(po.skipped, int) + assert isinstance(po.done, int) + assert isinstance(po.issues, int) + assert isinstance(po.labeled, int) + assert isinstance(po.total_data_rows, int) def _validate_moved(project, queue_name, data_row_count): timeout_seconds = 30