Skip to content

Commit 2d03c54

Browse files
authored
Speed up slices and add client.slices for all customer slices (#339)
* Speed up slices and add client.slices for all customer slices * Speed up unit test listing * Refactor slow __post_init__ pattern away * Remove test_reprs * Update version and CHANGELOG * Try to increase resource class * Patch unavailable fromisoformat in Python 3.6 * Bump to 0.14.14
1 parent e90b2c7 commit 2d03c54

File tree

10 files changed

+126
-89
lines changed

10 files changed

+126
-89
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,20 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.14.14](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.14) - 2022-08-11
9+
10+
### Added
11+
- client.slices to list all of users slices independent of dataset
12+
13+
### Fixed
14+
- Validate unit test listing and evaluation history listing. Now uses new bulk fetch endpoints for faster listing.
15+
816
## [0.14.13](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.13) - 2022-08-10
917

1018
### Fixed
1119
- Fix payload parsing for scene export
1220

21+
1322
## [0.14.12](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.12) - 2022-08-05
1423

1524
### Added

cli/slices.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,9 @@ def slices(ctx, web):
2323
@slices.command("list")
2424
def list_slices():
2525
"""List all available Slices"""
26-
with Live(
27-
Spinner("dots4", text="Finding your Slices!"),
28-
vertical_overflow="visible",
29-
) as live:
30-
client = init_client()
31-
datasets = client.datasets
26+
client = init_client()
27+
console = Console()
28+
with console.status("Finding your Slices!", spinner="dots4"):
3229
table = Table(
3330
Column("id", overflow="fold", min_width=24),
3431
"name",
@@ -37,26 +34,15 @@ def list_slices():
3734
title=":cake: Slices",
3835
title_justify="left",
3936
)
40-
errors = {}
41-
for ds in datasets:
42-
try:
43-
ds_slices = ds.slices
44-
if ds_slices:
45-
for slc_id in ds_slices:
46-
slice_url = nucleus_url(f"{ds.id}/{slc_id}")
47-
slice_info = client.get_slice(slc_id).info()
48-
table.add_row(
49-
slc_id, slice_info["name"], ds.name, slice_url
50-
)
51-
live.update(table)
52-
except NucleusAPIError as e:
53-
errors[ds.id] = e
54-
55-
error_tree = Tree(
56-
":x: Encountered the following errors while fetching information"
57-
)
58-
for ds_id, error in errors.items():
59-
dataset_branch = error_tree.add(f"Dataset: {ds_id}")
60-
dataset_branch.add(f"Error: {error}")
37+
datasets = client.datasets
38+
id_to_datasets = {d.id: d for d in datasets}
39+
all_slices = client.slices
40+
for s in all_slices:
41+
table.add_row(
42+
s.id,
43+
s.name,
44+
id_to_datasets[s.dataset_id].name,
45+
nucleus_url(f"{s.dataset_id}/{s.id}"),
46+
)
6147

62-
Console().print(error_tree)
48+
console.print(table)

nucleus/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ def jobs(
235235
"""
236236
return self.list_jobs()
237237

238+
@property
239+
def slices(self) -> List[Slice]:
240+
response = self.make_request({}, "slice/", requests.get)
241+
slices = [Slice.from_request(info, self) for info in response]
242+
return slices
243+
238244
@deprecated(msg="Use the NucleusClient.models property in the future.")
239245
def list_models(self) -> List[Model]:
240246
return self.models

nucleus/slice.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import datetime
12
import warnings
2-
from typing import Dict, Iterable, List, Set, Tuple, Union
3+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
34

45
import requests
56

@@ -49,16 +50,55 @@ def __init__(self, slice_id: str, client):
4950
self._client = client
5051
self._name = None
5152
self._dataset_id = None
53+
self._created_at = None
54+
self._pending_job_count = None
5255

5356
def __repr__(self):
54-
return f"Slice(slice_id='{self.id}', client={self._client})"
57+
return f"Slice(slice_id='{self.id}', name={self._name}, dataset_id={self._dataset_id})"
5558

5659
def __eq__(self, other):
5760
if self.id == other.id:
5861
if self._client == other._client:
5962
return True
6063
return False
6164

65+
@property
66+
def created_at(self) -> Optional[datetime.datetime]:
67+
"""Timestamp of creation of the slice
68+
69+
Returns:
70+
datetime of creation or None if not created yet
71+
"""
72+
if self._created_at is None:
73+
self._created_at = self.info().get("created_at", None)
74+
return self._created_at
75+
76+
@property
77+
def pending_job_count(self) -> Optional[int]:
78+
if self._pending_job_count is None:
79+
self._pending_job_count = self.info().get(
80+
"pending_job_count", None
81+
)
82+
return self._pending_job_count
83+
84+
@classmethod
85+
def from_request(cls, request, client):
86+
instance = cls(request["id"], client)
87+
instance._name = request.get("name", None)
88+
instance._dataset_id = request.get("dataset_id", None)
89+
created_at_str = request.get("created_at").rstrip("Z")
90+
if hasattr(datetime.datetime, "fromisoformat"):
91+
instance._created_at = datetime.datetime.fromisoformat(
92+
created_at_str
93+
)
94+
else:
95+
fmt_str = r"%Y-%m-%dT%H:%M:%S.%f" # replaces the fromisoformatm, not available in python 3.6
96+
instance._created_at = datetime.datetime.strptime(
97+
created_at_str, fmt_str
98+
)
99+
instance._pending_job_count = request.get("pending_job_count", None)
100+
return instance
101+
62102
@property
63103
def slice_id(self):
64104
warnings.warn(

nucleus/validate/client.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,17 @@ def create_scenario_test(
107107
).dict(),
108108
"validate/scenario_test",
109109
)
110-
return ScenarioTest(response[SCENARIO_TEST_ID_KEY], self.connection)
110+
return ScenarioTest.from_id(
111+
response[SCENARIO_TEST_ID_KEY], self.connection
112+
)
111113

112114
def get_scenario_test(self, scenario_test_id: str) -> ScenarioTest:
113115
response = self.connection.get(
114116
f"validate/scenario_test/{scenario_test_id}",
115117
)
116-
return ScenarioTest(response["unit_test"]["id"], self.connection)
118+
return ScenarioTest.from_id(
119+
response["unit_test"]["id"], self.connection
120+
)
117121

118122
@property
119123
def scenario_tests(self) -> List[ScenarioTest]:
@@ -131,12 +135,13 @@ def scenario_tests(self) -> List[ScenarioTest]:
131135
A list of ScenarioTest objects.
132136
"""
133137
response = self.connection.get(
134-
"validate/scenario_test",
138+
"validate/scenario_test/details",
135139
)
136-
return [
137-
ScenarioTest(test_id, self.connection)
138-
for test_id in response["scenario_test_ids"]
140+
tests = [
141+
ScenarioTest.from_response(payload, self.connection)
142+
for payload in response
139143
]
144+
return tests
140145

141146
def delete_scenario_test(self, scenario_test_id: str) -> bool:
142147
"""Deletes a Scenario Test. ::

nucleus/validate/data_transfer_objects/scenario_test_evaluations.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,6 @@
55
from nucleus.pydantic_base import ImmutableModel
66

77

8-
class EvalDetail(ImmutableModel):
9-
id: str
10-
11-
12-
class GetEvalHistory(ImmutableModel):
13-
evaluations: List[EvalDetail]
14-
15-
168
class EvaluationResult(ImmutableModel):
179
item_ref_id: str
1810
score: float

nucleus/validate/scenario_test.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818
THRESHOLD_KEY,
1919
ThresholdComparison,
2020
)
21-
from .data_transfer_objects.scenario_test_evaluations import (
22-
EvaluationResult,
23-
GetEvalHistory,
24-
)
21+
from .data_transfer_objects.scenario_test_evaluations import EvaluationResult
2522
from .data_transfer_objects.scenario_test_metric import AddScenarioTestFunction
2623
from .eval_functions.available_eval_functions import (
2724
EvalFunction,
@@ -52,13 +49,24 @@ class ScenarioTest:
5249
slice_id: str = field(init=False)
5350
baseline_model_id: Optional[str] = None
5451

55-
def __post_init__(self):
52+
@classmethod
53+
def from_id(cls, unit_test_id: str, connection: Connection):
5654
# TODO(gunnar): Remove this pattern. It's too slow. We should get all the info required in one call
57-
response = self.connection.get(
58-
f"validate/scenario_test/{self.id}/info",
55+
response = connection.get(
56+
f"validate/scenario_test/{unit_test_id}/info",
5957
)
60-
self.name = response[NAME_KEY]
61-
self.slice_id = response[SLICE_ID_KEY]
58+
instance = cls(unit_test_id, connection)
59+
instance.name = response[NAME_KEY]
60+
instance.slice_id = response[SLICE_ID_KEY]
61+
return instance
62+
63+
@classmethod
64+
def from_response(cls, response, connection: Connection):
65+
instance = cls(response["id"], connection)
66+
instance.name = response[NAME_KEY]
67+
instance.slice_id = response[SLICE_ID_KEY]
68+
instance.baseline_model_id = response.get("baseline_model_id", None)
69+
return instance
6270

6371
def add_eval_function(
6472
self, eval_function: EvalFunction
@@ -148,13 +156,13 @@ def get_eval_history(self) -> List[ScenarioTestEvaluation]:
148156
A list of :class:`ScenarioTestEvaluation` objects.
149157
"""
150158
response = self.connection.get(
151-
f"validate/scenario_test/{self.id}/eval_history",
159+
f"validate/scenario_test/{self.id}/eval_history/details",
152160
)
153-
eval_history = GetEvalHistory.parse_obj(response)
154-
return [
155-
ScenarioTestEvaluation(evaluation.id, self.connection)
156-
for evaluation in eval_history.evaluations
161+
evaluations = [
162+
ScenarioTestEvaluation.from_request(eval_payload, self.connection)
163+
for eval_payload in response
157164
]
165+
return evaluations
158166

159167
def get_items(self) -> List[DatasetItem]:
160168
response = self.connection.get(

nucleus/validate/scenario_test_evaluation.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Data types for Scenario Test Evaluation results."""
2-
from dataclasses import InitVar, dataclass, field
2+
from dataclasses import dataclass, field
33
from enum import Enum
44
from typing import List, Optional
55

@@ -77,31 +77,30 @@ class ScenarioTestEvaluation:
7777
status: ScenarioTestEvaluationStatus = field(init=False)
7878
result: Optional[float] = field(init=False)
7979
passed: bool = field(init=False)
80-
item_evals: List[ScenarioTestItemEvaluation] = field(init=False)
81-
connection: InitVar[Connection]
82-
83-
def __post_init__(self, connection: Connection):
84-
# TODO(gunnar): Having the function call /info on every construction is too slow. The original
85-
# endpoint should rather return the necessary human-readable information
86-
response = connection.make_request(
80+
connection: Connection = field(init=False, repr=False)
81+
82+
@classmethod
83+
def from_request(cls, response, connection):
84+
instance = cls(response["id"])
85+
instance.connection = connection
86+
87+
instance.scenario_test_id = response[SCENARIO_TEST_ID_KEY]
88+
instance.eval_function_id = response[EVAL_FUNCTION_ID_KEY]
89+
instance.model_id = response[MODEL_ID_KEY]
90+
instance.status = ScenarioTestEvaluationStatus(response[STATUS_KEY])
91+
instance.result = try_convert_float(response[RESULT_KEY])
92+
instance.passed = bool(response[PASS_KEY])
93+
return instance
94+
95+
@property
96+
def item_evals(self) -> List[ScenarioTestItemEvaluation]:
97+
response = self.connection.make_request(
8798
{},
8899
f"validate/eval/{self.id}/info",
89100
requests_command=requests.get,
90101
)
91-
eval_response = response[SCENARIO_TEST_EVAL_KEY]
92102
items_response = response[ITEM_EVAL_KEY]
93-
94-
self.scenario_test_id: str = eval_response[SCENARIO_TEST_ID_KEY]
95-
self.eval_function_id: str = eval_response[EVAL_FUNCTION_ID_KEY]
96-
self.model_id: str = eval_response[MODEL_ID_KEY]
97-
self.status: ScenarioTestEvaluationStatus = (
98-
ScenarioTestEvaluationStatus(eval_response[STATUS_KEY])
99-
)
100-
self.result: Optional[float] = try_convert_float(
101-
eval_response[RESULT_KEY]
102-
)
103-
self.passed: bool = bool(eval_response[PASS_KEY])
104-
self.item_evals: List[ScenarioTestItemEvaluation] = [
103+
items = [
105104
ScenarioTestItemEvaluation(
106105
evaluation_id=res[EVALUATION_ID_KEY],
107106
scenario_test_id=res[SCENARIO_TEST_ID_KEY],
@@ -112,3 +111,4 @@ def __post_init__(self, connection: Connection):
112111
)
113112
for res in items_response
114113
]
114+
return items

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.14.13"
24+
version = "0.14.14"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_slice.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,6 @@ def slc(CLIENT, dataset):
3535
CLIENT.delete_slice(slc.id)
3636

3737

38-
def test_reprs():
39-
# Have to define here in order to have access to all relevant objects
40-
def test_repr(test_object: any):
41-
assert eval(str(test_object)) == test_object
42-
43-
client = NucleusClient(api_key="fake_key")
44-
test_repr(Slice(slice_id="fake_slice_id", client=client))
45-
46-
4738
def test_slice_create_and_delete_and_list(dataset: Dataset):
4839
ds_items = dataset.items
4940

0 commit comments

Comments
 (0)