Skip to content

Commit 2e717c8

Browse files
authored
Modify get_overview to support duplicate names and provide a simplify… (#1629)
2 parents 80c730c + e58438b commit 2e717c8

File tree

6 files changed

+114
-49
lines changed

6 files changed

+114
-49
lines changed

docs/labelbox/project-overview.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ProjectOverview
1+
Project Overview
22
===============================================================================================
33

44
.. automodule:: labelbox.schema.project-overview

libs/labelbox/src/labelbox/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@
3939
from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds
4040
from labelbox.schema.identifiable import UniqueId, GlobalKey
4141
from labelbox.schema.ontology_kind import OntologyKind
42+
from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed

libs/labelbox/src/labelbox/schema/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@
2525
import labelbox.schema.identifiable
2626
import labelbox.schema.catalog
2727
import labelbox.schema.ontology_kind
28+
import labelbox.schema.project_overview

libs/labelbox/src/labelbox/schema/project.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from labelbox.schema.task import Task
4242
from labelbox.schema.task_queue import TaskQueue
4343
from labelbox.schema.ontology_kind import (EditorTaskType, OntologyKind)
44-
from labelbox.schema.project_overview import ProjectOverview
44+
from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed
4545

4646
if TYPE_CHECKING:
4747
from labelbox import BulkImportRequest
@@ -1751,16 +1751,25 @@ def __check_data_rows_have_been_processed(
17511751
return response["queryAllDataRowsHaveBeenProcessed"][
17521752
"allDataRowsHaveBeenProcessed"]
17531753

1754-
def get_overview(self) -> ProjectOverview:
1755-
""" Return the number of data rows per task queue, and issues of a project
1756-
Equivalent of the Overview tab of a project
1754+
def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]:
1755+
"""Return the overview of a project.
17571756
1758-
Args:
1759-
with_issues: (optional) boolean to include issues in the overview
1760-
Returns:
1761-
Object Project_Overview
1762-
"""
1757+
This method returns the number of data rows per task queue and issues of a project,
1758+
which is equivalent to the Overview tab of a project.
1759+
1760+
Args:
1761+
details (bool, optional): Whether to include detailed queue information for review and rework queues.
1762+
Defaults to False.
1763+
1764+
Returns:
1765+
Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview.
1766+
If `details` is False, returns a `ProjectOverview` object.
1767+
If `details` is True, returns a `ProjectOverviewDetailed` object.
17631768
1769+
Raises:
1770+
Exception: If there is an error executing the query.
1771+
1772+
"""
17641773
query = """query ProjectGetOverviewPyApi($projectId: ID!) {
17651774
project(where: { id: $projectId }) {
17661775
workstreamStateCounts {
@@ -1782,31 +1791,38 @@ def get_overview(self) -> ProjectOverview:
17821791

17831792
# Must use experimental to access "issues"
17841793
result = self.client.execute(query, {"projectId": self.uid},
1785-
experimental=True)["project"]
1794+
experimental=True)["project"]
17861795

1796+
# Reformat category names
17871797
overview = {
17881798
utils.snake_case(st["state"]): st["count"]
17891799
for st in result.get("workstreamStateCounts")
17901800
if st["state"] != "NotInTaskQueue"
17911801
}
17921802

1793-
review_queues = {
1794-
tq["name"]: tq.get("dataRowCount")
1795-
for tq in result.get("taskQueues")
1796-
if tq.get("queueType") == "MANUAL_REVIEW_QUEUE"
1797-
}
1798-
1799-
# Store the total number of data rows in review
1800-
review_queues["all"] = overview.get("in_review")
1801-
overview["in_review"] = review_queues
1802-
18031803
overview["issues"] = result.get("issues", {}).get("totalCount")
18041804

1805-
# Rename keys
1805+
# Rename categories
18061806
overview["to_label"] = overview.pop("unlabeled")
1807-
overview["all_in_data_rows"] = overview.pop("all")
1807+
overview["total_data_rows"] = overview.pop("all")
18081808

1809-
return ProjectOverview(**overview)
1809+
if not details:
1810+
return ProjectOverview(**overview)
1811+
else:
1812+
# Build dictionary for queue details for review and rework queues
1813+
for category in ["rework", "review"]:
1814+
queues = [
1815+
{tq["name"]: tq.get("dataRowCount")}
1816+
for tq in result.get("taskQueues")
1817+
if tq.get("queueType") == f"MANUAL_{category.upper()}_QUEUE"
1818+
]
1819+
1820+
overview[f"in_{category}"] = {
1821+
"data": queues,
1822+
"total": overview[f"in_{category}"]
1823+
}
1824+
1825+
return ProjectOverviewDetailed(**overview)
18101826

18111827
def clone(self) -> "Project":
18121828
"""
Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,72 @@
1-
from typing import Dict
1+
from typing import Dict, List
22
from labelbox.pydantic_compat import BaseModel
3-
4-
from typing import Dict
3+
from typing_extensions import TypedDict
54

65
class ProjectOverview(BaseModel):
76
"""
87
Class that represents a project summary as displayed in the UI, in Annotate,
98
under the "Overview" tab of a particular project.
109
1110
All attributes represent the number of data rows in the corresponding state.
12-
The `in_review` attribute is a dictionary where the keys are the queue names
13-
and the values are the number of data rows in that queue.
11+
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
12+
The `in_review` attribute is a dictionary where the keys are the queue names and the values are the number of data rows in that queue (In Review).
13+
The `in_rework` attribute represents the number of data rows that are in the Rework queue (In Rework).
14+
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
15+
The `done` attribute represents the number of data rows that have been marked as Done (Done).
16+
The `issues` attribute represents the number of data rows with associated issues (Issues).
1417
15-
Attributes:
16-
Representing existing fields from the Overview tag (UI names in parentheses):
17-
18-
to_label (int): The number of data rows that are yet to be labeled (To Label).
19-
in_review (Dict[str, int]): A dictionary where the keys are the queue names .
20-
and the values are the number of data rows in that queue. (In Review)
21-
in_rework (int): The number of data rows that are in the Rework queue (In Rework).
22-
skipped (int): The number of data rows that have been skipped (Skipped).
23-
done (int): The number of data rows that have been marked as Done (Done).
24-
issues (int): The number of data rows with associated issues (Issues).
25-
26-
Additional values:
27-
28-
labeled (int): The number of data rows that have been labeled.
29-
all_in_data_rows (int): The total number of data rows in the project.
18+
The following don't appear in the UI
19+
The `labeled` attribute represents the number of data rows that have been labeled.
20+
The `total_data_rows` attribute represents the total number of data rows in the project.
3021
"""
3122
to_label: int
32-
in_review: Dict[str, int]
23+
in_review: int
3324
in_rework: int
3425
skipped: int
3526
done: int
3627
issues: int
3728
labeled: int
38-
all_in_data_rows: int
29+
total_data_rows: int
30+
31+
32+
class _QueueDetail(TypedDict):
33+
"""
34+
Class that represents the detailed information of the queues in the project overview.
35+
The `data` attribute is a list of dictionaries where the keys are the queue names
36+
and the values are the number of data rows in that queue.
37+
"""
38+
data: List[Dict[str, int]]
39+
total: int
40+
41+
42+
class ProjectOverviewDetailed(BaseModel):
43+
"""
44+
Class that represents a project summary as displayed in the UI, in Annotate,
45+
under the "Overview" tab of a particular project.
46+
This class adds the list of task queues for the `in_review` and `in_rework` attributes.
47+
48+
All attributes represent the number of data rows in the corresponding state.
49+
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
50+
The `in_review` attribute is a dictionary where the keys are (In Review):
51+
data: a list of dictionaries with the queue name and the number of data rows
52+
total: the total number of data rows in the In Review state
53+
The `in_rework` attribute is a dictionary where the keys are (In Rework):
54+
data: a list of dictionaries with the queue name and the number of data rows
55+
total: the total number of data rows in the In Rework state
56+
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
57+
The `done` attribute represents the number of data rows that have been marked as Done (Done).
58+
The `issues` attribute represents the number of data rows with associated issues (Issues).
59+
60+
The following don't appear in the UI
61+
The `labeled` attribute represents the number of data rows that have been labeled.
62+
The `total_data_rows` attribute represents the total number of data rows in the project.
63+
"""
64+
65+
to_label: int
66+
in_review: _QueueDetail
67+
in_rework: _QueueDetail
68+
skipped: int
69+
done: int
70+
issues: int
71+
labeled: int
72+
total_data_rows: int

libs/labelbox/tests/integration/test_task_queue.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,31 @@ def test_get_task_queue(project: Project):
1111
assert review_queue
1212

1313

14-
def test_get_overview(project: Project):
14+
def test_get_overview_no_details(project: Project):
1515
po = project.get_overview()
1616

1717
assert isinstance(po.to_label, int)
18-
assert isinstance(po.in_review, dict)
18+
assert isinstance(po.in_review, int)
1919
assert isinstance(po.in_rework, int)
2020
assert isinstance(po.skipped, int)
2121
assert isinstance(po.done, int)
2222
assert isinstance(po.issues, int)
2323
assert isinstance(po.labeled, int)
24-
assert isinstance(po.all_in_data_rows, int)
24+
assert isinstance(po.total_data_rows, int)
2525

26+
def test_get_overview_with_details(project: Project):
27+
po = project.get_overview(details=True)
28+
29+
assert isinstance(po.to_label, int)
30+
assert isinstance(po.in_review["data"], list)
31+
assert isinstance(po.in_review["total"], int)
32+
assert isinstance(po.in_rework["data"], list)
33+
assert isinstance(po.in_rework["total"], int)
34+
assert isinstance(po.skipped, int)
35+
assert isinstance(po.done, int)
36+
assert isinstance(po.issues, int)
37+
assert isinstance(po.labeled, int)
38+
assert isinstance(po.total_data_rows, int)
2639

2740
def _validate_moved(project, queue_name, data_row_count):
2841
timeout_seconds = 30

0 commit comments

Comments
 (0)