Skip to content

Commit 7ba272c

Browse files
committed
Modify get_overview to support duplicate names and provide a simplify output
1 parent fa6deb5 commit 7ba272c

File tree

2 files changed

+136
-68
lines changed

2 files changed

+136
-68
lines changed

libs/labelbox/src/labelbox/schema/project.py

Lines changed: 81 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from labelbox.schema.task import Task
4242
from labelbox.schema.task_queue import TaskQueue
4343
from labelbox.schema.ontology_kind import (EditorTaskType, OntologyKind)
44-
from labelbox.schema.project_overview import ProjectOverview
44+
from labelbox.schema.project_overview import ProjectOverview, ProjectOverviewDetailed
4545

4646
if TYPE_CHECKING:
4747
from labelbox import BulkImportRequest
@@ -1753,62 +1753,97 @@ def __check_data_rows_have_been_processed(
17531753
return response["queryAllDataRowsHaveBeenProcessed"][
17541754
"allDataRowsHaveBeenProcessed"]
17551755

1756-
def get_overview(self) -> ProjectOverview:
1757-
""" Return the number of data rows per task queue, and issues of a project
1758-
Equivalent of the Overview tab of a project
1756+
def get_overview(self, details=False) -> Union[ProjectOverview, ProjectOverviewDetailed]:
1757+
"""Return the overview of a project.
1758+
1759+
This method returns the number of data rows per task queue and issues of a project,
1760+
which is equivalent to the Overview tab of a project.
1761+
1762+
Args:
1763+
details (bool, optional): Whether to include detailed queue information for review and rework queues.
1764+
Defaults to False.
1765+
1766+
Returns:
1767+
Union[ProjectOverview, ProjectOverviewDetailed]: An object representing the project overview.
1768+
If `details` is False, returns a `ProjectOverview` object.
1769+
If `details` is True, returns a `ProjectOverviewDetailed` object.
1770+
1771+
Raises:
1772+
Exception: If there is an error executing the query.
1773+
1774+
"""
1775+
def _build_queue_details(overview_category: str, queue_type: str, total: int):
1776+
"""
1777+
Builds the queue details for a given overview category and queue type.
17591778
17601779
Args:
1761-
with_issues: (optional) boolean to include issues in the overview
1780+
overview_category (str): The overview category.
1781+
queue_type (str): The queue type.
1782+
total (int): The total number of items in the queue.
1783+
17621784
Returns:
1763-
Object Project_Overview
1785+
dict: A dictionary containing the queue details.
1786+
- data (list): A list of dictionaries representing the queues.
1787+
Each dictionary contains the queue name and the number of data rows.
1788+
- total (int): The total number of data rows for the category
17641789
"""
1790+
queues = [
1791+
{tq["name"]: tq.get("dataRowCount")}
1792+
for tq in result.get("taskQueues")
1793+
if tq.get("queueType") == queue_type
1794+
]
1795+
1796+
return {
1797+
"data": queues,
1798+
"total": total
1799+
}
17651800

1766-
query = """query ProjectGetOverviewPyApi($projectId: ID!) {
1767-
project(where: { id: $projectId }) {
1768-
workstreamStateCounts {
1769-
state
1770-
count
1771-
}
1772-
taskQueues {
1773-
queueType
1774-
name
1775-
dataRowCount
1776-
}
1777-
issues {
1778-
totalCount
1779-
}
1780-
completedDataRowCount
1801+
query = """query ProjectGetOverviewPyApi($projectId: ID!) {
1802+
project(where: { id: $projectId }) {
1803+
workstreamStateCounts {
1804+
state
1805+
count
1806+
}
1807+
taskQueues {
1808+
queueType
1809+
name
1810+
dataRowCount
1811+
}
1812+
issues {
1813+
totalCount
1814+
}
1815+
completedDataRowCount
1816+
}
17811817
}
1782-
}
1783-
"""
1818+
"""
17841819

1785-
# Must use experimental to access "issues"
1786-
result = self.client.execute(query, {"projectId": self.uid},
1787-
experimental=True)["project"]
1820+
# Must use experimental to access "issues"
1821+
result = self.client.execute(query, {"projectId": self.uid},
1822+
experimental=True)["project"]
17881823

1789-
overview = {
1790-
utils.snake_case(st["state"]): st["count"]
1791-
for st in result.get("workstreamStateCounts")
1792-
if st["state"] != "NotInTaskQueue"
1793-
}
1794-
1795-
review_queues = {
1796-
tq["name"]: tq.get("dataRowCount")
1797-
for tq in result.get("taskQueues")
1798-
if tq.get("queueType") == "MANUAL_REVIEW_QUEUE"
1799-
}
1800-
1801-
# Store the total number of data rows in review
1802-
review_queues["all"] = overview.get("in_review")
1803-
overview["in_review"] = review_queues
1824+
# Reformat category names
1825+
overview = {
1826+
utils.snake_case(st["state"]): st["count"]
1827+
for st in result.get("workstreamStateCounts")
1828+
if st["state"] != "NotInTaskQueue"
1829+
}
18041830

1805-
overview["issues"] = result.get("issues", {}).get("totalCount")
1831+
overview["issues"] = result.get("issues", {}).get("totalCount")
18061832

1807-
# Rename keys
1808-
overview["to_label"] = overview.pop("unlabeled")
1809-
overview["all_in_data_rows"] = overview.pop("all")
1833+
# Rename categories
1834+
overview["to_label"] = overview.pop("unlabeled")
1835+
overview["total_data_rows"] = overview.pop("all")
18101836

1811-
return ProjectOverview(**overview)
1837+
if not details:
1838+
return ProjectOverview(**overview)
1839+
else:
1840+
# Build queue details for review and rework queues
1841+
for category in ["rework", "review"]:
1842+
overview[f"in_{category}"] = _build_queue_details(f"in_{category}",
1843+
f"MANUAL_{category.upper()}_QUEUE",
1844+
overview[f"in_{category}"])
1845+
1846+
return ProjectOverviewDetailed(**overview)
18121847

18131848
def clone(self) -> "Project":
18141849
"""
Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,71 @@
1-
from typing import Dict
1+
from typing import Dict, List, TypedDict
22
from labelbox.pydantic_compat import BaseModel
33

4-
from typing import Dict
5-
64
class ProjectOverview(BaseModel):
75
"""
86
Class that represents a project summary as displayed in the UI, in Annotate,
97
under the "Overview" tab of a particular project.
108
119
All attributes represent the number of data rows in the corresponding state.
12-
The `in_review` attribute is a dictionary where the keys are the queue names
13-
and the values are the number of data rows in that queue.
10+
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
11+
The `in_review` attribute is a dictionary where the keys are the queue names and the values are the number of data rows in that queue (In Review).
12+
The `in_rework` attribute represents the number of data rows that are in the Rework queue (In Rework).
13+
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
14+
The `done` attribute represents the number of data rows that have been marked as Done (Done).
15+
The `issues` attribute represents the number of data rows with associated issues (Issues).
1416
15-
Attributes:
16-
Representing existing fields from the Overview tag (UI names in parentheses):
17-
18-
to_label (int): The number of data rows that are yet to be labeled (To Label).
19-
in_review (Dict[str, int]): A dictionary where the keys are the queue names .
20-
and the values are the number of data rows in that queue. (In Review)
21-
in_rework (int): The number of data rows that are in the Rework queue (In Rework).
22-
skipped (int): The number of data rows that have been skipped (Skipped).
23-
done (int): The number of data rows that have been marked as Done (Done).
24-
issues (int): The number of data rows with associated issues (Issues).
25-
26-
Additional values:
27-
28-
labeled (int): The number of data rows that have been labeled.
29-
all_in_data_rows (int): The total number of data rows in the project.
17+
The following don't appear in the UI
18+
The `labeled` attribute represents the number of data rows that have been labeled.
19+
The `total_data_rows` attribute represents the total number of data rows in the project.
3020
"""
3121
to_label: int
32-
in_review: Dict[str, int]
22+
in_review: int
3323
in_rework: int
3424
skipped: int
3525
done: int
3626
issues: int
3727
labeled: int
38-
all_in_data_rows: int
28+
total_data_rows: int
29+
30+
31+
class _QueueDetail(TypedDict):
32+
"""
33+
Class that represents the detailed information of the queues in the project overview.
34+
The `data` attribute is a list of dictionaries where the keys are the queue names
35+
and the values are the number of data rows in that queue.
36+
"""
37+
data: List[Dict[str, int]]
38+
total: int
39+
40+
41+
class ProjectOverviewDetailed(BaseModel):
42+
"""
43+
Class that represents a project summary as displayed in the UI, in Annotate,
44+
under the "Overview" tab of a particular project.
45+
This class adds the list of task queues for the `in_review` and `in_rework` attributes.
46+
47+
All attributes represent the number of data rows in the corresponding state.
48+
The `to_label` attribute represents the number of data rows that are yet to be labeled (To Label).
49+
The `in_review` attribute is a dictionary where the keys are (In Review):
50+
data: a list of dictionaries with the queue name and the number of data rows
51+
total: the total number of data rows in the In Review state
52+
The `in_rework` attribute is a dictionary where the keys are (In Rework):
53+
data: a list of dictionaries with the queue name and the number of data rows
54+
total: the total number of data rows in the In Rework state
55+
The `skipped` attribute represents the number of data rows that have been skipped (Skipped).
56+
The `done` attribute represents the number of data rows that have been marked as Done (Done).
57+
The `issues` attribute represents the number of data rows with associated issues (Issues).
58+
59+
The following don't appear in the UI
60+
The `labeled` attribute represents the number of data rows that have been labeled.
61+
The `total_data_rows` attribute represents the total number of data rows in the project.
62+
"""
63+
64+
to_label: int
65+
in_review: _QueueDetail
66+
in_rework: _QueueDetail
67+
skipped: int
68+
done: int
69+
issues: int
70+
labeled: int
71+
total_data_rows: int

0 commit comments

Comments
 (0)