Skip to content

Modify get_overview to support duplicate names and provide a simplify… #1629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/labelbox/project-overview.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ProjectOverview
Project Overview
===============================================================================================

.. automodule:: labelbox.schema.project-overview
Expand Down
1 change: 1 addition & 0 deletions libs/labelbox/src/labelbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@
from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds
from labelbox.schema.identifiable import UniqueId, GlobalKey
from labelbox.schema.ontology_kind import OntologyKind
from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed
1 change: 1 addition & 0 deletions libs/labelbox/src/labelbox/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@
import labelbox.schema.identifiable
import labelbox.schema.catalog
import labelbox.schema.ontology_kind
import labelbox.schema.project_overview
62 changes: 39 additions & 23 deletions libs/labelbox/src/labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from labelbox.schema.task import Task
from labelbox.schema.task_queue import TaskQueue
from labelbox.schema.ontology_kind import (EditorTaskType, OntologyKind)
from labelbox.schema.project_overview import ProjectOverview
from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed

if TYPE_CHECKING:
from labelbox import BulkImportRequest
Expand Down Expand Up @@ -1751,16 +1751,25 @@ def __check_data_rows_have_been_processed(
return response["queryAllDataRowsHaveBeenProcessed"][
"allDataRowsHaveBeenProcessed"]

def get_overview(self) -> ProjectOverview:
""" Return the number of data rows per task queue, and issues of a project
Equivalent of the Overview tab of a project
def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]:
"""Return the overview of a project.

Args:
with_issues: (optional) boolean to include issues in the overview
Returns:
Object Project_Overview
"""
This method returns the number of data rows per task queue and issues of a project,
which is equivalent to the Overview tab of a project.

Args:
details (bool, optional): Whether to include detailed queue information for review and rework queues.
Defaults to False.

Returns:
Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview.
If `details` is False, returns a `ProjectOverview` object.
If `details` is True, returns a `ProjectOverviewDetailed` object.

Raises:
Exception: If there is an error executing the query.

"""
query = """query ProjectGetOverviewPyApi($projectId: ID!) {
project(where: { id: $projectId }) {
workstreamStateCounts {
Expand All @@ -1782,31 +1791,38 @@ def get_overview(self) -> ProjectOverview:

# Must use experimental to access "issues"
result = self.client.execute(query, {"projectId": self.uid},
experimental=True)["project"]
experimental=True)["project"]

# Reformat category names
overview = {
utils.snake_case(st["state"]): st["count"]
for st in result.get("workstreamStateCounts")
if st["state"] != "NotInTaskQueue"
}

review_queues = {
tq["name"]: tq.get("dataRowCount")
for tq in result.get("taskQueues")
if tq.get("queueType") == "MANUAL_REVIEW_QUEUE"
}

# Store the total number of data rows in review
review_queues["all"] = overview.get("in_review")
overview["in_review"] = review_queues

overview["issues"] = result.get("issues", {}).get("totalCount")

# Rename keys
# Rename categories
overview["to_label"] = overview.pop("unlabeled")
overview["all_in_data_rows"] = overview.pop("all")
overview["total_data_rows"] = overview.pop("all")

return ProjectOverview(**overview)
if not details:
return ProjectOverview(**overview)
else:
# Build dictionary for queue details for review and rework queues
for category in ["rework", "review"]:
queues = [
{tq["name"]: tq.get("dataRowCount")}
for tq in result.get("taskQueues")
if tq.get("queueType") == f"MANUAL_{category.upper()}_QUEUE"
]

overview[f"in_{category}"] = {
"data": queues,
"total": overview[f"in_{category}"]
}

return ProjectOverviewDetailed(**overview)

def clone(self) -> "Project":
"""
Expand Down
78 changes: 56 additions & 22 deletions libs/labelbox/src/labelbox/schema/project_overview.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,72 @@
from typing import Dict
from typing import Dict, List
from labelbox.pydantic_compat import BaseModel

from typing import Dict
from typing_extensions import TypedDict

class ProjectOverview(BaseModel):
"""
Class that represents a project summary as displayed in the UI, in Annotate,
under the "Overview" tab of a particular project.

All attributes represent the number of data rows in the corresponding state.
The `in_review` attribute is a dictionary where the keys are the queue names
and the values are the number of data rows in that queue.
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
The `in_review` attribute is a dictionary where the keys are the queue names and the values are the number of data rows in that queue (In Review).
The `in_rework` attribute represents the number of data rows that are in the Rework queue (In Rework).
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
The `done` attribute represents the number of data rows that have been marked as Done (Done).
The `issues` attribute represents the number of data rows with associated issues (Issues).

Attributes:
Representing existing fields from the Overview tag (UI names in parentheses):

to_label (int): The number of data rows that are yet to be labeled (To Label).
in_review (Dict[str, int]): A dictionary where the keys are the queue names .
and the values are the number of data rows in that queue. (In Review)
in_rework (int): The number of data rows that are in the Rework queue (In Rework).
skipped (int): The number of data rows that have been skipped (Skipped).
done (int): The number of data rows that have been marked as Done (Done).
issues (int): The number of data rows with associated issues (Issues).

Additional values:

labeled (int): The number of data rows that have been labeled.
all_in_data_rows (int): The total number of data rows in the project.
The following don't appear in the UI
The `labeled` attribute represents the number of data rows that have been labeled.
The `total_data_rows` attribute represents the total number of data rows in the project.
"""
to_label: int
in_review: Dict[str, int]
in_review: int
in_rework: int
skipped: int
done: int
issues: int
labeled: int
all_in_data_rows: int
total_data_rows: int


class _QueueDetail(TypedDict):
"""
Class that represents the detailed information of the queues in the project overview.
The `data` attribute is a list of dictionaries where the keys are the queue names
and the values are the number of data rows in that queue.
"""
data: List[Dict[str, int]]
total: int


class ProjectOverviewDetailed(BaseModel):
"""
Class that represents a project summary as displayed in the UI, in Annotate,
under the "Overview" tab of a particular project.
This class adds the list of task queues for the `in_review` and `in_rework` attributes.

All attributes represent the number of data rows in the corresponding state.
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
The `in_review` attribute is a dictionary where the keys are (In Review):
data: a list of dictionaries with the queue name and the number of data rows
total: the total number of data rows in the In Review state
The `in_rework` attribute is a dictionary where the keys are (In Rework):
data: a list of dictionaries with the queue name and the number of data rows
total: the total number of data rows in the In Rework state
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
The `done` attribute represents the number of data rows that have been marked as Done (Done).
The `issues` attribute represents the number of data rows with associated issues (Issues).

The following don't appear in the UI
The `labeled` attribute represents the number of data rows that have been labeled.
The `total_data_rows` attribute represents the total number of data rows in the project.
"""

to_label: int
in_review: _QueueDetail
in_rework: _QueueDetail
skipped: int
done: int
issues: int
labeled: int
total_data_rows: int
19 changes: 16 additions & 3 deletions libs/labelbox/tests/integration/test_task_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,31 @@ def test_get_task_queue(project: Project):
assert review_queue


def test_get_overview(project: Project):
def test_get_overview_no_details(project: Project):
po = project.get_overview()

assert isinstance(po.to_label, int)
assert isinstance(po.in_review, dict)
assert isinstance(po.in_review, int)
assert isinstance(po.in_rework, int)
assert isinstance(po.skipped, int)
assert isinstance(po.done, int)
assert isinstance(po.issues, int)
assert isinstance(po.labeled, int)
assert isinstance(po.all_in_data_rows, int)
assert isinstance(po.total_data_rows, int)

def test_get_overview_with_details(project: Project):
po = project.get_overview(details=True)

assert isinstance(po.to_label, int)
assert isinstance(po.in_review["data"], list)
assert isinstance(po.in_review["total"], int)
assert isinstance(po.in_rework["data"], list)
assert isinstance(po.in_rework["total"], int)
assert isinstance(po.skipped, int)
assert isinstance(po.done, int)
assert isinstance(po.issues, int)
assert isinstance(po.labeled, int)
assert isinstance(po.total_data_rows, int)

def _validate_moved(project, queue_name, data_row_count):
timeout_seconds = 30
Expand Down