Skip to content

Commit dc4a025

Browse files
authored
Expose embedding indexes of Dataset (#408)
1 parent 0b2f68c commit dc4a025

File tree

4 files changed

+71
-0
lines changed

4 files changed

+71
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111
- Allow direct embedding vector upload together with dataset items. `DatasetItem` now has an additional parameter called `embedding_info` which can be used to directly upload embeddings when a dataset is uploaded.
12+
- Added `dataset.embedding_indexes` property, which exposes information about every embedding index which belongs to the dataset.
1213

1314

1415
## [0.16.6](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.6) - 2023-11-01

nucleus/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
EMBEDDING_VECTOR_KEY = "embedding_vector"
5353
EMBEDDINGS_URL_KEY = "embeddings_urls"
5454
EMBEDDING_DIMENSION_KEY = "embedding_dimension"
55+
EMBEDDING_TYPE_KEY = "embedding_type"
5556
ERRORS_KEY = "errors"
5657
ERROR_CODES = "error_codes"
5758
ERROR_ITEMS = "upload_errors"
@@ -73,6 +74,8 @@
7374
INDEX_KEY = "index"
7475
INDEX_ID_KEY = "index_id"
7576
INDEX_CONTINUOUS_ENABLE_KEY = "enable"
77+
INDEX_LEVEL_KEY = "index_level"
78+
INDEX_TYPE_KEY = "index_type"
7679
IOU_KEY = "iou"
7780
ITEMS_KEY = "items"
7881
ITEM_KEY = "item"

nucleus/dataset.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from nucleus.annotation_uploader import AnnotationUploader, PredictionUploader
1919
from nucleus.async_job import AsyncJob, EmbeddingsExportJob
20+
from nucleus.embedding_index import EmbeddingIndex
2021
from nucleus.evaluation_match import EvaluationMatch
2122
from nucleus.prediction import from_json as prediction_from_json
2223
from nucleus.track import Track
@@ -194,6 +195,18 @@ def slices(self) -> List[Slice]:
194195
)
195196
return [Slice.from_request(info, self._client) for info in response]
196197

198+
@property
199+
def embedding_indexes(self) -> List[EmbeddingIndex]:
200+
"""Gets all the embedding indexes belonging to this Dataset."""
201+
response = self._client.make_request(
202+
{}, f"dataset/{self.id}/embeddingIndexes", requests.get
203+
)
204+
205+
return [
206+
EmbeddingIndex.from_json(info)
207+
for info in response["embedding_indexes"]
208+
]
209+
197210
def get_slices(
198211
self,
199212
name: Optional[str] = None,

nucleus/embedding_index.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from dataclasses import dataclass
2+
from enum import Enum
3+
4+
from nucleus.constants import (
5+
EMBEDDING_DIMENSION_KEY,
6+
EMBEDDING_TYPE_KEY,
7+
ID_KEY,
8+
INDEX_LEVEL_KEY,
9+
INDEX_TYPE_KEY,
10+
STATUS_KEY,
11+
)
12+
13+
14+
class IndexType(str, Enum):
15+
INTERNAL = "Internal"
16+
CUSTOM = "Custom"
17+
18+
19+
class IndexLevel(str, Enum):
20+
IMAGE = "Image"
21+
OBJECT = "Object"
22+
23+
24+
class IndexStatus(str, Enum):
25+
STARTED = "Started"
26+
COMPLETED = "Completed"
27+
ERRORED = "Errored"
28+
29+
30+
@dataclass
31+
class EmbeddingIndex:
32+
"""Represents an Embedding Index belonging to a Dataset.
33+
34+
Embedding Indexes contain generated embeddings for each item in the dataset,
35+
and are used by the Autotag and the Similarity Search functionality.
36+
"""
37+
38+
id: str
39+
status: IndexStatus
40+
index_type: IndexType
41+
index_level: IndexLevel
42+
embedding_type: str
43+
embedding_dimension: int
44+
45+
@classmethod
46+
def from_json(cls, payload: dict):
47+
return cls(
48+
id=payload[ID_KEY],
49+
status=payload[STATUS_KEY],
50+
index_type=payload[INDEX_TYPE_KEY],
51+
index_level=payload[INDEX_LEVEL_KEY],
52+
embedding_type=payload[EMBEDDING_TYPE_KEY],
53+
embedding_dimension=payload[EMBEDDING_DIMENSION_KEY],
54+
)

0 commit comments

Comments
 (0)