Skip to content

Commit 2b0bd13

Browse files
authored
Merge pull request #1087 from meilisearch/feat/add-embedders-settings
Update `embedders` settings, hybrid search, and add tests for AI search methods
2 parents 1603f44 + 44a68a5 commit 2b0bd13

File tree

8 files changed

+547
-104
lines changed

8 files changed

+547
-104
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,27 @@ JSON output:
143143
}
144144
```
145145

146+
#### Hybrid Search <!-- omit in toc -->
147+
148+
Hybrid search combines traditional keyword search with semantic search for more relevant results. You need to have an embedder configured in your index settings to use this feature.
149+
150+
```python
151+
# Using hybrid search with the search method
152+
index.search(
153+
'action movie',
154+
{
155+
"hybrid": {"semanticRatio": 0.5, "embedder": "default"}
156+
}
157+
)
158+
```
159+
160+
The `semanticRatio` parameter (between 0 and 1) controls the balance between keyword search and semantic search:
161+
- 0: Only keyword search
162+
- 1: Only semantic search
163+
- Values in between: A mix of both approaches
164+
165+
The `embedder` parameter specifies which configured embedder to use for the semantic search component.
166+
146167
#### Custom Search With Filters <!-- omit in toc -->
147168

148169
If you want to enable filtering, you must add your attributes to the `filterableAttributes` index setting.

meilisearch/index.py

Lines changed: 84 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,22 @@
2424
from meilisearch.config import Config
2525
from meilisearch.errors import version_error_hint_message
2626
from meilisearch.models.document import Document, DocumentsResults
27-
from meilisearch.models.index import (
27+
from meilisearch.models.embedders import (
2828
Embedders,
29-
Faceting,
29+
EmbedderType,
3030
HuggingFaceEmbedder,
31-
IndexStats,
32-
LocalizedAttributes,
3331
OllamaEmbedder,
3432
OpenAiEmbedder,
33+
RestEmbedder,
34+
UserProvidedEmbedder,
35+
)
36+
from meilisearch.models.index import (
37+
Faceting,
38+
IndexStats,
39+
LocalizedAttributes,
3540
Pagination,
3641
ProximityPrecision,
37-
RestEmbedder,
3842
TypoTolerance,
39-
UserProvidedEmbedder,
4043
)
4144
from meilisearch.models.task import Task, TaskInfo, TaskResults
4245
from meilisearch.task import TaskHandler
@@ -277,14 +280,21 @@ def get_stats(self) -> IndexStats:
277280
def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) -> Dict[str, Any]:
278281
"""Search in the index.
279282
283+
https://www.meilisearch.com/docs/reference/api/search
284+
280285
Parameters
281286
----------
282287
query:
283288
String containing the searched word(s)
284289
opt_params (optional):
285290
Dictionary containing optional query parameters.
286-
Note: The vector parameter is only available in Meilisearch >= v1.13.0
287-
https://www.meilisearch.com/docs/reference/api/search#search-in-an-index
291+
Common parameters include:
292+
- hybrid: Dict with 'semanticRatio' and 'embedder' fields for hybrid search
293+
- vector: Array of numbers for vector search
294+
- retrieveVectors: Boolean to include vector data in search results
295+
- filter: Filter queries by an attribute's value
296+
- limit: Maximum number of documents returned
297+
- offset: Number of documents to skip
288298
289299
Returns
290300
-------
@@ -298,7 +308,9 @@ def search(self, query: str, opt_params: Optional[Mapping[str, Any]] = None) ->
298308
"""
299309
if opt_params is None:
300310
opt_params = {}
311+
301312
body = {"q": query, **opt_params}
313+
302314
return self.http.post(
303315
f"{self.config.paths.index}/{self.uid}/{self.config.paths.search}",
304316
body=body,
@@ -955,14 +967,7 @@ def get_settings(self) -> Dict[str, Any]:
955967
)
956968

957969
if settings.get("embedders"):
958-
embedders: dict[
959-
str,
960-
OpenAiEmbedder
961-
| HuggingFaceEmbedder
962-
| OllamaEmbedder
963-
| RestEmbedder
964-
| UserProvidedEmbedder,
965-
] = {}
970+
embedders: dict[str, EmbedderType] = {}
966971
for k, v in settings["embedders"].items():
967972
if v.get("source") == "openAi":
968973
embedders[k] = OpenAiEmbedder(**v)
@@ -988,6 +993,26 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo:
988993
----------
989994
body:
990995
Dictionary containing the settings of the index.
996+
Supported settings include:
997+
- 'rankingRules': List of ranking rules
998+
- 'distinctAttribute': Attribute for deduplication
999+
- 'searchableAttributes': Attributes that can be searched
1000+
- 'displayedAttributes': Attributes to display in search results
1001+
- 'stopWords': Words ignored in search queries
1002+
- 'synonyms': Dictionary of synonyms
1003+
- 'filterableAttributes': Attributes that can be used for filtering
1004+
- 'sortableAttributes': Attributes that can be used for sorting
1005+
- 'typoTolerance': Settings for typo tolerance
1006+
- 'pagination': Settings for pagination
1007+
- 'faceting': Settings for faceting
1008+
- 'dictionary': List of custom dictionary words
1009+
- 'separatorTokens': List of separator tokens
1010+
- 'nonSeparatorTokens': List of non-separator tokens
1011+
- 'embedders': Dictionary of embedder configurations for AI-powered search
1012+
- 'searchCutoffMs': Maximum search time in milliseconds
1013+
- 'proximityPrecision': Precision for proximity ranking
1014+
- 'localizedAttributes': Settings for localized attributes
1015+
9911016
More information:
9921017
https://www.meilisearch.com/docs/reference/api/settings#update-settings
9931018
@@ -1000,7 +1025,8 @@ def update_settings(self, body: MutableMapping[str, Any]) -> TaskInfo:
10001025
Raises
10011026
------
10021027
MeilisearchApiError
1003-
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1028+
An error containing details about why Meilisearch can't process your request.
1029+
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
10041030
"""
10051031
if body.get("embedders"):
10061032
for _, v in body["embedders"].items():
@@ -1879,10 +1905,13 @@ def reset_non_separator_tokens(self) -> TaskInfo:
18791905
def get_embedders(self) -> Embedders | None:
18801906
"""Get embedders of the index.
18811907
1908+
Retrieves the current embedder configuration from Meilisearch.
1909+
18821910
Returns
18831911
-------
1884-
settings:
1885-
The embedders settings of the index.
1912+
Embedders:
1913+
The embedders settings of the index, or None if no embedders are configured.
1914+
Contains a dictionary of embedder configurations, where keys are embedder names.
18861915
18871916
Raises
18881917
------
@@ -1894,35 +1923,35 @@ def get_embedders(self) -> Embedders | None:
18941923
if not response:
18951924
return None
18961925

1897-
embedders: dict[
1898-
str,
1899-
OpenAiEmbedder
1900-
| HuggingFaceEmbedder
1901-
| OllamaEmbedder
1902-
| RestEmbedder
1903-
| UserProvidedEmbedder,
1904-
] = {}
1926+
embedders: dict[str, EmbedderType] = {}
19051927
for k, v in response.items():
1906-
if v.get("source") == "openAi":
1928+
source = v.get("source")
1929+
if source == "openAi":
19071930
embedders[k] = OpenAiEmbedder(**v)
1908-
elif v.get("source") == "ollama":
1909-
embedders[k] = OllamaEmbedder(**v)
1910-
elif v.get("source") == "huggingFace":
1931+
elif source == "huggingFace":
19111932
embedders[k] = HuggingFaceEmbedder(**v)
1912-
elif v.get("source") == "rest":
1933+
elif source == "ollama":
1934+
embedders[k] = OllamaEmbedder(**v)
1935+
elif source == "rest":
19131936
embedders[k] = RestEmbedder(**v)
1937+
elif source == "userProvided":
1938+
embedders[k] = UserProvidedEmbedder(**v)
19141939
else:
1940+
# Default to UserProvidedEmbedder for unknown sources
19151941
embedders[k] = UserProvidedEmbedder(**v)
19161942

19171943
return Embedders(embedders=embedders)
19181944

19191945
def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskInfo:
19201946
"""Update embedders of the index.
19211947
1948+
Updates the embedder configuration for the index. The embedder configuration
1949+
determines how Meilisearch generates vector embeddings for documents.
1950+
19221951
Parameters
19231952
----------
19241953
body: dict
1925-
Dictionary containing the embedders.
1954+
Dictionary containing the embedders configuration.
19261955
19271956
Returns
19281957
-------
@@ -1933,13 +1962,28 @@ def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskI
19331962
Raises
19341963
------
19351964
MeilisearchApiError
1936-
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
1965+
An error containing details about why Meilisearch can't process your request.
1966+
Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
19371967
"""
1968+
if body is not None and body.get("embedders"):
1969+
embedders: dict[str, EmbedderType] = {}
1970+
for k, v in body["embedders"].items():
1971+
source = v.get("source")
1972+
if source == "openAi":
1973+
embedders[k] = OpenAiEmbedder(**v)
1974+
elif source == "huggingFace":
1975+
embedders[k] = HuggingFaceEmbedder(**v)
1976+
elif source == "ollama":
1977+
embedders[k] = OllamaEmbedder(**v)
1978+
elif source == "rest":
1979+
embedders[k] = RestEmbedder(**v)
1980+
elif source == "userProvided":
1981+
embedders[k] = UserProvidedEmbedder(**v)
1982+
else:
1983+
# Default to UserProvidedEmbedder for unknown sources
1984+
embedders[k] = UserProvidedEmbedder(**v)
19381985

1939-
if body:
1940-
for _, v in body.items():
1941-
if "documentTemplateMaxBytes" in v and v["documentTemplateMaxBytes"] is None:
1942-
del v["documentTemplateMaxBytes"]
1986+
body = {"embedders": {k: v.model_dump(by_alias=True) for k, v in embedders.items()}}
19431987

19441988
task = self.http.patch(self.__settings_url_for(self.config.paths.embedders), body)
19451989

@@ -1948,6 +1992,8 @@ def update_embedders(self, body: Union[MutableMapping[str, Any], None]) -> TaskI
19481992
def reset_embedders(self) -> TaskInfo:
19491993
"""Reset embedders of the index to default values.
19501994
1995+
Removes all embedder configurations from the index.
1996+
19511997
Returns
19521998
-------
19531999
task_info:

0 commit comments

Comments
 (0)