Skip to content

Commit 3233b9c

Browse files
authored
chore: update qdrant to remove hybrid search breaking change (#3360)
## Summary Describe key changes, mention related issues or motivation for the changes. (If applicable, issue number: #____) ## Type of change - [ ] Bug fix - [ ] New feature - [ ] Breaking change - [x] Improvement - [ ] Model update - [ ] Other: --- ## Checklist - [x] Code complies with style guidelines - [x] Ran format/validation scripts (`./scripts/format.sh` and `./scripts/validate.sh`) - [x] Self-review completed - [ ] Documentation updated (comments, docstrings) - [ ] Examples and guides: Relevant cookbook examples have been included or updated (if applicable) - [x] Tested in clean environment - [ ] Tests added/updated (if applicable) --- ## Additional Notes Add any important context (deployment instructions, screenshots, security considerations, etc.)
1 parent 82f55fa commit 3233b9c

File tree

4 files changed

+122
-57
lines changed

4 files changed

+122
-57
lines changed

cookbook/agent_concepts/state/last_n_session_messages.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
storage=SqliteStorage(table_name="agent_sessions_new", db_file="tmp/data.db"),
1414
add_history_to_messages=True,
1515
num_history_runs=3,
16-
search_previous_sessions_history=True, # allow searching previous sessions
17-
num_history_sessions=2, # only include the last 2 sessions in the search to avoid context length issues
16+
search_previous_sessions_history=True, # allow searching previous sessions
17+
num_history_sessions=2, # only include the last 2 sessions in the search to avoid context length issues
1818
show_tool_calls=True,
1919
)
2020

cookbook/reasoning/teams/finance_team_chain_of_thought.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ async def run_team(task: str):
6060

6161

6262
if __name__ == "__main__":
63-
64-
asyncio.run(run_team(
65-
dedent("""\
63+
asyncio.run(
64+
run_team(
65+
dedent("""\
6666
Analyze the impact of recent US tariffs on market performance across these key sectors:
6767
- Steel & Aluminum: (X, NUE, AA)
6868
- Technology Hardware: (AAPL, DELL, HPQ)
@@ -75,4 +75,5 @@ async def run_team(task: str):
7575
3. Analyze companies' strategic responses (reshoring, price adjustments, supplier diversification)
7676
4. Assess analyst outlook changes directly attributed to tariff policies
7777
""")
78-
))
78+
)
79+
)

libs/agno/agno/vectordb/qdrant/qdrant.py

Lines changed: 114 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ def __init__(
119119
self.sparse_vector_name = sparse_vector_name
120120
self.hybrid_fusion_strategy = hybrid_fusion_strategy
121121

122+
# TODO(v2.0.0): Remove backward compatibility for unnamed vectors
123+
# TODO(v2.0.0): Make named vectors mandatory and simplify the codebase
124+
self.use_named_vectors = search_type in [SearchType.hybrid]
125+
122126
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
123127
try:
124128
from fastembed import SparseTextEmbedding
@@ -131,7 +135,7 @@ def __init__(
131135

132136
except ImportError as e:
133137
raise ImportError(
134-
"To use keyword/hybrid search, install the `fastembed` extra with `pip install 'qdrant-client[fastembed]'`."
138+
"To use keyword/hybrid search, install the `fastembed` extra with `pip install fastembed`."
135139
) from e
136140

137141
@property
@@ -176,7 +180,6 @@ def async_client(self) -> AsyncQdrantClient:
176180
return self._async_client
177181

178182
def create(self) -> None:
179-
# Collection distance
180183
_distance = models.Distance.COSINE
181184
if self.distance == Distance.l2:
182185
_distance = models.Distance.EUCLID
@@ -185,11 +188,18 @@ def create(self) -> None:
185188

186189
if not self.exists():
187190
log_debug(f"Creating collection: {self.collection}")
191+
192+
# Configure vectors based on search type
193+
if self.search_type == SearchType.vector:
194+
# Maintain backward compatibility with unnamed vectors
195+
vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
196+
else:
197+
# Use named vectors for hybrid search
198+
vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}
199+
188200
self.client.create_collection(
189201
collection_name=self.collection,
190-
vectors_config={self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}
191-
if self.search_type in [SearchType.vector, SearchType.hybrid]
192-
else {},
202+
vectors_config=vectors_config,
193203
sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
194204
if self.search_type in [SearchType.keyword, SearchType.hybrid]
195205
else None,
@@ -206,11 +216,18 @@ async def async_create(self) -> None:
206216

207217
if not await self.async_exists():
208218
log_debug(f"Creating collection asynchronously: {self.collection}")
219+
220+
# Configure vectors based on search type
221+
if self.search_type == SearchType.vector:
222+
# Maintain backward compatibility with unnamed vectors
223+
vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
224+
else:
225+
# Use named vectors for hybrid search
226+
vectors_config = {self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}
227+
209228
await self.async_client.create_collection(
210229
collection_name=self.collection,
211-
vectors_config={self.dense_vector_name: models.VectorParams(size=self.dimensions, distance=_distance)}
212-
if self.search_type in [SearchType.vector, SearchType.hybrid]
213-
else {},
230+
vectors_config=vectors_config,
214231
sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
215232
if self.search_type in [SearchType.keyword, SearchType.hybrid]
216233
else None,
@@ -300,13 +317,25 @@ def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] =
300317
cleaned_content = document.content.replace("\x00", "\ufffd")
301318
doc_id = md5(cleaned_content.encode()).hexdigest()
302319

303-
vector = {}
304-
if self.search_type in [SearchType.vector, SearchType.hybrid]:
320+
# TODO(v2.0.0): Remove conditional vector naming logic
321+
if self.use_named_vectors:
322+
vector = {self.dense_vector_name: document.embedding}
323+
else:
324+
vector = document.embedding # type: ignore
325+
326+
if self.search_type == SearchType.vector:
327+
# For vector search, maintain backward compatibility with unnamed vectors
305328
document.embed(embedder=self.embedder)
306-
vector[self.dense_vector_name] = document.embedding
329+
vector = document.embedding # type: ignore
330+
else:
331+
# For other search types, use named vectors
332+
vector = {}
333+
if self.search_type in [SearchType.hybrid]:
334+
document.embed(embedder=self.embedder)
335+
vector[self.dense_vector_name] = document.embedding
307336

308-
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
309-
vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
337+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
338+
vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
310339

311340
# Create payload with document properties
312341
payload = {
@@ -349,11 +378,19 @@ async def process_document(document):
349378
cleaned_content = document.content.replace("\x00", "\ufffd")
350379
doc_id = md5(cleaned_content.encode()).hexdigest()
351380

352-
vector = {}
353-
354-
if self.search_type in [SearchType.vector, SearchType.hybrid]:
381+
if self.search_type == SearchType.vector:
382+
# For vector search, maintain backward compatibility with unnamed vectors
355383
document.embed(embedder=self.embedder)
356-
vector[self.dense_vector_name] = document.embedding
384+
vector = document.embedding
385+
else:
386+
# For other search types, use named vectors
387+
vector = {}
388+
if self.search_type in [SearchType.hybrid]:
389+
document.embed(embedder=self.embedder)
390+
vector[self.dense_vector_name] = document.embedding
391+
392+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
393+
vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
357394

358395
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
359396
vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
@@ -441,64 +478,77 @@ async def async_search(
441478

442479
return self._build_search_results(results, query)
443480

444-
def _run_vector_search_sync(
481+
def _run_hybrid_search_sync(
445482
self,
446483
query: str,
447484
limit: int,
448485
filters: Optional[Dict[str, Any]],
449486
) -> List[models.ScoredPoint]:
450487
dense_embedding = self.embedder.get_embedding(query)
488+
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
451489
call = self.client.query_points(
452490
collection_name=self.collection,
453-
query=dense_embedding,
491+
prefetch=[
492+
models.Prefetch(
493+
query=models.SparseVector(**sparse_embedding),
494+
limit=limit,
495+
using=self.sparse_vector_name,
496+
),
497+
models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
498+
],
499+
query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
454500
with_vectors=True,
455501
with_payload=True,
456502
limit=limit,
457503
query_filter=filters,
458-
using=self.dense_vector_name,
459504
)
460505
return call.points
461506

462-
def _run_keyword_search_sync(
507+
def _run_vector_search_sync(
463508
self,
464509
query: str,
465510
limit: int,
466511
filters: Optional[Dict[str, Any]],
467512
) -> List[models.ScoredPoint]:
468-
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
469-
call = self.client.query_points(
470-
collection_name=self.collection,
471-
query=models.SparseVector(**sparse_embedding),
472-
with_vectors=True,
473-
with_payload=True,
474-
limit=limit,
475-
using=self.sparse_vector_name,
476-
query_filter=filters,
477-
)
513+
dense_embedding = self.embedder.get_embedding(query)
514+
515+
# TODO(v2.0.0): Remove this conditional and always use named vectors
516+
if self.use_named_vectors:
517+
call = self.client.query_points(
518+
collection_name=self.collection,
519+
query=dense_embedding,
520+
with_vectors=True,
521+
with_payload=True,
522+
limit=limit,
523+
query_filter=filters,
524+
using=self.dense_vector_name,
525+
)
526+
else:
527+
# Backward compatibility mode - use unnamed vector
528+
call = self.client.query_points(
529+
collection_name=self.collection,
530+
query=dense_embedding,
531+
with_vectors=True,
532+
with_payload=True,
533+
limit=limit,
534+
query_filter=filters,
535+
)
478536
return call.points
479537

480-
def _run_hybrid_search_sync(
538+
def _run_keyword_search_sync(
481539
self,
482540
query: str,
483541
limit: int,
484542
filters: Optional[Dict[str, Any]],
485543
) -> List[models.ScoredPoint]:
486-
dense_embedding = self.embedder.get_embedding(query)
487544
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
488545
call = self.client.query_points(
489546
collection_name=self.collection,
490-
prefetch=[
491-
models.Prefetch(
492-
query=models.SparseVector(**sparse_embedding),
493-
limit=limit,
494-
using=self.sparse_vector_name,
495-
),
496-
models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
497-
],
498-
query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
547+
query=models.SparseVector(**sparse_embedding),
499548
with_vectors=True,
500549
with_payload=True,
501550
limit=limit,
551+
using=self.sparse_vector_name,
502552
query_filter=filters,
503553
)
504554
return call.points
@@ -510,15 +560,28 @@ async def _run_vector_search_async(
510560
filters: Optional[Dict[str, Any]],
511561
) -> List[models.ScoredPoint]:
512562
dense_embedding = self.embedder.get_embedding(query)
513-
call = await self.async_client.query_points(
514-
collection_name=self.collection,
515-
query=dense_embedding,
516-
with_vectors=True,
517-
with_payload=True,
518-
limit=limit,
519-
query_filter=filters,
520-
using=self.dense_vector_name,
521-
)
563+
564+
# TODO(v2.0.0): Remove this conditional and always use named vectors
565+
if self.use_named_vectors:
566+
call = await self.async_client.query_points(
567+
collection_name=self.collection,
568+
query=dense_embedding,
569+
with_vectors=True,
570+
with_payload=True,
571+
limit=limit,
572+
query_filter=filters,
573+
using=self.dense_vector_name,
574+
)
575+
else:
576+
# Backward compatibility mode - use unnamed vector
577+
call = await self.async_client.query_points(
578+
collection_name=self.collection,
579+
query=dense_embedding,
580+
with_vectors=True,
581+
with_payload=True,
582+
limit=limit,
583+
query_filter=filters,
584+
)
522585
return call.points
523586

524587
async def _run_keyword_search_async(

libs/agno/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ module = [
315315
"e2b_code_interpreter.*",
316316
"exa_py.*",
317317
"fastapi.*",
318+
"fastembed.*",
318319
"filetype.*",
319320
"firecrawl.*",
320321
"github.*",

0 commit comments

Comments
 (0)