@@ -119,6 +119,10 @@ def __init__(
119119 self .sparse_vector_name = sparse_vector_name
120120 self .hybrid_fusion_strategy = hybrid_fusion_strategy
121121
122+ # TODO(v2.0.0): Remove backward compatibility for unnamed vectors
123+ # TODO(v2.0.0): Make named vectors mandatory and simplify the codebase
124+ self .use_named_vectors = search_type in [SearchType .hybrid ]
125+
122126 if self .search_type in [SearchType .keyword , SearchType .hybrid ]:
123127 try :
124128 from fastembed import SparseTextEmbedding
@@ -131,7 +135,7 @@ def __init__(
131135
132136 except ImportError as e :
133137 raise ImportError (
134- "To use keyword/hybrid search, install the `fastembed` extra with `pip install 'qdrant-client[ fastembed]' `."
138+ "To use keyword/hybrid search, install the `fastembed` extra with `pip install fastembed`."
135139 ) from e
136140
137141 @property
@@ -176,7 +180,6 @@ def async_client(self) -> AsyncQdrantClient:
176180 return self ._async_client
177181
178182 def create (self ) -> None :
179- # Collection distance
180183 _distance = models .Distance .COSINE
181184 if self .distance == Distance .l2 :
182185 _distance = models .Distance .EUCLID
@@ -185,11 +188,18 @@ def create(self) -> None:
185188
186189 if not self .exists ():
187190 log_debug (f"Creating collection: { self .collection } " )
191+
192+ # Configure vectors based on search type
193+ if self .search_type == SearchType .vector :
194+ # Maintain backward compatibility with unnamed vectors
195+ vectors_config = models .VectorParams (size = self .dimensions , distance = _distance )
196+ else :
197+ # Use named vectors for hybrid search
198+ vectors_config = {self .dense_vector_name : models .VectorParams (size = self .dimensions , distance = _distance )}
199+
188200 self .client .create_collection (
189201 collection_name = self .collection ,
190- vectors_config = {self .dense_vector_name : models .VectorParams (size = self .dimensions , distance = _distance )}
191- if self .search_type in [SearchType .vector , SearchType .hybrid ]
192- else {},
202+ vectors_config = vectors_config ,
193203 sparse_vectors_config = {self .sparse_vector_name : models .SparseVectorParams ()}
194204 if self .search_type in [SearchType .keyword , SearchType .hybrid ]
195205 else None ,
@@ -206,11 +216,18 @@ async def async_create(self) -> None:
206216
207217 if not await self .async_exists ():
208218 log_debug (f"Creating collection asynchronously: { self .collection } " )
219+
220+ # Configure vectors based on search type
221+ if self .search_type == SearchType .vector :
222+ # Maintain backward compatibility with unnamed vectors
223+ vectors_config = models .VectorParams (size = self .dimensions , distance = _distance )
224+ else :
225+ # Use named vectors for hybrid search
226+ vectors_config = {self .dense_vector_name : models .VectorParams (size = self .dimensions , distance = _distance )}
227+
209228 await self .async_client .create_collection (
210229 collection_name = self .collection ,
211- vectors_config = {self .dense_vector_name : models .VectorParams (size = self .dimensions , distance = _distance )}
212- if self .search_type in [SearchType .vector , SearchType .hybrid ]
213- else {},
230+ vectors_config = vectors_config ,
214231 sparse_vectors_config = {self .sparse_vector_name : models .SparseVectorParams ()}
215232 if self .search_type in [SearchType .keyword , SearchType .hybrid ]
216233 else None ,
@@ -300,13 +317,25 @@ def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] =
300317 cleaned_content = document .content .replace ("\x00 " , "\ufffd " )
301318 doc_id = md5 (cleaned_content .encode ()).hexdigest ()
302319
303- vector = {}
304- if self .search_type in [SearchType .vector , SearchType .hybrid ]:
320+ # TODO(v2.0.0): Remove conditional vector naming logic
321+ if self .use_named_vectors :
322+ vector = {self .dense_vector_name : document .embedding }
323+ else :
324+ vector = document .embedding # type: ignore
325+
326+ if self .search_type == SearchType .vector :
327+ # For vector search, maintain backward compatibility with unnamed vectors
305328 document .embed (embedder = self .embedder )
306- vector [self .dense_vector_name ] = document .embedding
329+ vector = document .embedding # type: ignore
330+ else :
331+ # For other search types, use named vectors
332+ vector = {}
333+ if self .search_type in [SearchType .hybrid ]:
334+ document .embed (embedder = self .embedder )
335+ vector [self .dense_vector_name ] = document .embedding
307336
308- if self .search_type in [SearchType .keyword , SearchType .hybrid ]:
309- vector [self .sparse_vector_name ] = next (self .sparse_encoder .embed ([document .content ])).as_object ()
337+ if self .search_type in [SearchType .keyword , SearchType .hybrid ]:
338+ vector [self .sparse_vector_name ] = next (self .sparse_encoder .embed ([document .content ])).as_object ()
310339
311340 # Create payload with document properties
312341 payload = {
@@ -349,11 +378,19 @@ async def process_document(document):
349378 cleaned_content = document .content .replace ("\x00 " , "\ufffd " )
350379 doc_id = md5 (cleaned_content .encode ()).hexdigest ()
351380
352- vector = {}
353-
354- if self .search_type in [SearchType .vector , SearchType .hybrid ]:
381+ if self .search_type == SearchType .vector :
382+ # For vector search, maintain backward compatibility with unnamed vectors
355383 document .embed (embedder = self .embedder )
356- vector [self .dense_vector_name ] = document .embedding
384+ vector = document .embedding
385+ else :
386+ # For other search types, use named vectors
387+ vector = {}
388+ if self .search_type in [SearchType .hybrid ]:
389+ document .embed (embedder = self .embedder )
390+ vector [self .dense_vector_name ] = document .embedding
391+
392+ if self .search_type in [SearchType .keyword , SearchType .hybrid ]:
393+ vector [self .sparse_vector_name ] = next (self .sparse_encoder .embed ([document .content ])).as_object ()
357394
358395 if self .search_type in [SearchType .keyword , SearchType .hybrid ]:
359396 vector [self .sparse_vector_name ] = next (self .sparse_encoder .embed ([document .content ])).as_object ()
@@ -441,64 +478,77 @@ async def async_search(
441478
442479 return self ._build_search_results (results , query )
443480
444- def _run_vector_search_sync (
481+ def _run_hybrid_search_sync (
445482 self ,
446483 query : str ,
447484 limit : int ,
448485 filters : Optional [Dict [str , Any ]],
449486 ) -> List [models .ScoredPoint ]:
450487 dense_embedding = self .embedder .get_embedding (query )
488+ sparse_embedding = next (self .sparse_encoder .embed ([query ])).as_object ()
451489 call = self .client .query_points (
452490 collection_name = self .collection ,
453- query = dense_embedding ,
491+ prefetch = [
492+ models .Prefetch (
493+ query = models .SparseVector (** sparse_embedding ),
494+ limit = limit ,
495+ using = self .sparse_vector_name ,
496+ ),
497+ models .Prefetch (query = dense_embedding , limit = limit , using = self .dense_vector_name ),
498+ ],
499+ query = models .FusionQuery (fusion = self .hybrid_fusion_strategy ),
454500 with_vectors = True ,
455501 with_payload = True ,
456502 limit = limit ,
457503 query_filter = filters ,
458- using = self .dense_vector_name ,
459504 )
460505 return call .points
461506
462- def _run_keyword_search_sync (
507+ def _run_vector_search_sync (
463508 self ,
464509 query : str ,
465510 limit : int ,
466511 filters : Optional [Dict [str , Any ]],
467512 ) -> List [models .ScoredPoint ]:
468- sparse_embedding = next (self .sparse_encoder .embed ([query ])).as_object ()
469- call = self .client .query_points (
470- collection_name = self .collection ,
471- query = models .SparseVector (** sparse_embedding ),
472- with_vectors = True ,
473- with_payload = True ,
474- limit = limit ,
475- using = self .sparse_vector_name ,
476- query_filter = filters ,
477- )
513+ dense_embedding = self .embedder .get_embedding (query )
514+
515+ # TODO(v2.0.0): Remove this conditional and always use named vectors
516+ if self .use_named_vectors :
517+ call = self .client .query_points (
518+ collection_name = self .collection ,
519+ query = dense_embedding ,
520+ with_vectors = True ,
521+ with_payload = True ,
522+ limit = limit ,
523+ query_filter = filters ,
524+ using = self .dense_vector_name ,
525+ )
526+ else :
527+ # Backward compatibility mode - use unnamed vector
528+ call = self .client .query_points (
529+ collection_name = self .collection ,
530+ query = dense_embedding ,
531+ with_vectors = True ,
532+ with_payload = True ,
533+ limit = limit ,
534+ query_filter = filters ,
535+ )
478536 return call .points
479537
480- def _run_hybrid_search_sync (
538+ def _run_keyword_search_sync (
481539 self ,
482540 query : str ,
483541 limit : int ,
484542 filters : Optional [Dict [str , Any ]],
485543 ) -> List [models .ScoredPoint ]:
486- dense_embedding = self .embedder .get_embedding (query )
487544 sparse_embedding = next (self .sparse_encoder .embed ([query ])).as_object ()
488545 call = self .client .query_points (
489546 collection_name = self .collection ,
490- prefetch = [
491- models .Prefetch (
492- query = models .SparseVector (** sparse_embedding ),
493- limit = limit ,
494- using = self .sparse_vector_name ,
495- ),
496- models .Prefetch (query = dense_embedding , limit = limit , using = self .dense_vector_name ),
497- ],
498- query = models .FusionQuery (fusion = self .hybrid_fusion_strategy ),
547+ query = models .SparseVector (** sparse_embedding ),
499548 with_vectors = True ,
500549 with_payload = True ,
501550 limit = limit ,
551+ using = self .sparse_vector_name ,
502552 query_filter = filters ,
503553 )
504554 return call .points
@@ -510,15 +560,28 @@ async def _run_vector_search_async(
510560 filters : Optional [Dict [str , Any ]],
511561 ) -> List [models .ScoredPoint ]:
512562 dense_embedding = self .embedder .get_embedding (query )
513- call = await self .async_client .query_points (
514- collection_name = self .collection ,
515- query = dense_embedding ,
516- with_vectors = True ,
517- with_payload = True ,
518- limit = limit ,
519- query_filter = filters ,
520- using = self .dense_vector_name ,
521- )
563+
564+ # TODO(v2.0.0): Remove this conditional and always use named vectors
565+ if self .use_named_vectors :
566+ call = await self .async_client .query_points (
567+ collection_name = self .collection ,
568+ query = dense_embedding ,
569+ with_vectors = True ,
570+ with_payload = True ,
571+ limit = limit ,
572+ query_filter = filters ,
573+ using = self .dense_vector_name ,
574+ )
575+ else :
576+ # Backward compatibility mode - use unnamed vector
577+ call = await self .async_client .query_points (
578+ collection_name = self .collection ,
579+ query = dense_embedding ,
580+ with_vectors = True ,
581+ with_payload = True ,
582+ limit = limit ,
583+ query_filter = filters ,
584+ )
522585 return call .points
523586
524587 async def _run_keyword_search_async (
0 commit comments