SolaceLabs
diff --git a/‎sam-rag/configs/agents/rag.yaml‎
Lines changed: 28 additions & 1 deletion b/‎sam-rag/configs/agents/rag.yaml‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎sam-rag/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎sam-rag/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sam-rag/src/agents/rag/services/database/vector_db_base.py‎
Lines changed: 19 additions & 4 deletions b/‎sam-rag/src/agents/rag/services/database/vector_db_base.py‎
Lines changed: 19 additions & 4 deletions
diff --git a/‎sam-rag/src/agents/rag/services/database/vector_db_implementation/__init__.py‎
Lines changed: 33 additions & 0 deletions b/‎sam-rag/src/agents/rag/services/database/vector_db_implementation/__init__.py‎
Lines changed: 33 additions & 0 deletions
@@ -43,6 +43,11 @@ flows:
           llm_service_topic: ${SOLACE_AGENT_MESH_NAMESPACE}solace-agent-mesh/v1/llm-service/request/general-good/
           embedding_service_topic: ${SOLACE_AGENT_MESH_NAMESPACE}solace-agent-mesh/v1/embedding-service/request/text/
           agent_name: rag
+
+          # Hybrid Search Configuration
+          hybrid_search:
+            enabled: ${HYBRID_SEARCH_ENABLED} # Global toggle for hybrid search
+
           # Scanner configuration
           scanner:
             batch: true
@@ -276,6 +281,10 @@ flows:
               batch_size: 32
               additional_kwargs: {}
             normalize_embeddings: True
+            hybrid_search: # Configuration specific to hybrid search embedding
+              sparse_model_config: # Configuration for sparse vector model (e.g., BM25, SPLADE)
+                type: "tfidf" # Changed from "bm25" to "tfidf" to match implementation
+                params: {} # Model-specific parameters
 
           # Vector database configuration
           vector_db:
@@ -286,6 +295,10 @@ flows:
               api_key: ${QDRANT_API_KEY}
               collection_name: ${QDRANT_COLLECTION}
               embedding_dimension: ${QDRANT_EMBEDDING_DIMENSION}
+              hybrid_search_params: # Qdrant specific params, active if global hybrid_search.enabled is true
+                sparse_vector_name: "sparse_db" # Example name for the sparse vector in Qdrant
+                # fusion_method: "rrf" # Example, if using direct Query API and need to specify
+                # Other Qdrant specific hybrid query params can go here
 
             # Chroma DB configuration
             # db_type: "chroma"
@@ -307,7 +320,21 @@ flows:
             #   metric: ${PINECONE_METRIC}
             #   cloud: ${PINECONE_CLOUD}
             #   region: ${PINECONE_REGION}
-
+            #   hybrid_search_params: # Pinecone specific params, active if global hybrid_search.enabled is true
+            #     alpha: 0.5 # Example: 0.0 for pure sparse, 1.0 for pure dense. Default often 0.5
+            
+            # Redis configuration (placeholder)
+            # db_type: "redis"
+            # db_params:
+            #   url: ${REDIS_URL} # e.g., redis://localhost:6379
+            #   index_name: ${REDIS_INDEX_NAME} # e.g., "rag_idx"
+            #   embedding_dimension: ${REDIS_EMBEDDING_DIMENSION}
+            #   text_field_name: "content" # Name of the text field in Redis for FT search
+            #   vector_field_name: "embedding" # Name of the vector field in Redis
+            #   hybrid_search_params: # Redis specific params, active if global hybrid_search.enabled is true
+            #     text_score_weight: 0.3 # Example weight for full-text search score
+            #     vector_score_weight: 0.7 # Example weight for vector similarity score
+            #     # Other params like HYBRID_POLICY (e.g., "WEIGHTED") if applicable via client
 
             # # PostgreSQL with pgvector
             # db_type: "pgvector"
 
@@ -28,6 +28,7 @@ dependencies = [
     "watchdog==6.0.0",  # Fixed syntax for watchdog dependency
     "ujson==5.10.0",  # For faster JSON processing
     "odfpy==1.4.1",
+    "scikit-learn>=1.0",  # For TF-IDF and other machine learning utilities
 ]
 
 [project.optional-dependencies]
 
@@ -11,14 +11,21 @@ class VectorDBBase(ABC):
     Abstract base class for vector databases.
     """
 
-    def __init__(self, config: Dict[str, Any] = None):
+    def __init__(
+        self,
+        config: Dict[str, Any] = None,
+        hybrid_search_config: Optional[Dict[str, Any]] = None,
+    ):
         """
         Initialize the vector database with the given configuration.
 
         Args:
-            config: A dictionary containing configuration parameters.
+            config: A dictionary containing configuration parameters for the specific database.
+            hybrid_search_config: Optional dictionary containing hybrid search configuration.
         """
         self.config = config or {}
+        self.hybrid_search_config = hybrid_search_config or {}
+        self.hybrid_search_enabled = self.hybrid_search_config.get("enabled", False)
 
     @abstractmethod
     def add_documents(
@@ -27,15 +34,17 @@ def add_documents(
         embeddings: List[List[float]],
         metadatas: Optional[List[Dict[str, Any]]] = None,
         ids: Optional[List[str]] = None,
+        sparse_vectors: Optional[List[Optional[Dict[int, float]]]] = None,
     ) -> List[str]:
         """
         Add documents to the vector database.
 
         Args:
             documents: The documents to add.
-            embeddings: The embeddings of the documents.
+            embeddings: The dense embeddings of the documents.
             metadatas: Optional metadata for each document.
             ids: Optional IDs for each document.
+            sparse_vectors: Optional sparse vector representations for each document.
 
         Returns:
             The IDs of the added documents.
@@ -48,14 +57,18 @@ def search(
         query_embedding: List[float],
         top_k: int = 5,
         filter: Optional[Dict[str, Any]] = None,
+        query_sparse_vector: Optional[Dict[int, float]] = None,
+        request_hybrid: bool = False,
     ) -> List[Dict[str, Any]]:
         """
         Search for documents similar to the query embedding.
 
         Args:
-            query_embedding: The query embedding.
+            query_embedding: The dense query embedding.
             top_k: The number of results to return.
             filter: Optional filter to apply to the search.
+            query_sparse_vector: Optional sparse vector for the query.
+            request_hybrid: Flag to request hybrid search if available and enabled.
 
         Returns:
             A list of dictionaries containing the search results.
@@ -92,6 +105,7 @@ def update(
         documents: Optional[List[str]] = None,
         embeddings: Optional[List[List[float]]] = None,
         metadatas: Optional[List[Dict[str, Any]]] = None,
+        sparse_vectors: Optional[List[Optional[Dict[int, float]]]] = None,
     ) -> None:
         """
         Update documents in the vector database.
@@ -101,6 +115,7 @@ def update(
             documents: Optional new document contents.
             embeddings: Optional new embeddings.
             metadatas: Optional new metadata.
+            sparse_vectors: Optional sparse vector representations for each document.
         """
         pass
 
 
@@ -0,0 +1,33 @@
+"""
+Vector DB Implementations Package.
+
+This package contains specific implementations for various vector databases.
+"""
+
+from .pinecone_db import PineconeDB
+from .qdrant_db import QdrantDB
+from .redis_legacy_db import RedisDB as RedisLegacyDB  # Alias to avoid name clash
+from .pgvector_db import PgVectorDB
+from .chroma_db import ChromaDB
+from .redis_vl_db import RedisDB as RedisVLDB  # Alias for the redisvl version
+
+# You can define an __all__ list if you want to specify what gets imported
+# when a client does 'from . import *'
+__all__ = [
+    "PineconeDB",
+    "QdrantDB",
+    "RedisLegacyDB",  # Use the alias
+    "PgVectorDB",
+    "ChromaDB",
+    "RedisVLDB",  # Use the alias
+]
+
+# Optional: A dictionary mapping names to classes for easier dynamic loading
+IMPLEMENTATIONS = {
+    "pinecone": PineconeDB,
+    "qdrant": QdrantDB,
+    "redis_legacy": RedisLegacyDB,
+    "pgvector": PgVectorDB,
+    "chroma": ChromaDB,
+    "redis_vl": RedisVLDB,
+}
Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ dependencies = [`
`28`	`28`	`"watchdog==6.0.0", # Fixed syntax for watchdog dependency`
`29`	`29`	`"ujson==5.10.0", # For faster JSON processing`
`30`	`30`	`"odfpy==1.4.1",`
	`31`	`+ "scikit-learn>=1.0", # For TF-IDF and other machine learning utilities`
`31`	`32`	`]`
`32`	`33`
`33`	`34`	`[project.optional-dependencies]`