From 56c0f875e0dd2fcbfd8b5a305161d8aeeb5568ce Mon Sep 17 00:00:00 2001 From: Conrado Silva Miranda Date: Thu, 21 Nov 2024 15:32:52 -0800 Subject: [PATCH 1/2] Refactor vector store for dependency injection --- llm-service/app/ai/indexing/index.py | 4 +- .../vector_stores/__init__.py} | 11 - .../vector_stores/qdrant.py} | 42 ++-- .../vector_stores}/vector_store.py | 2 +- .../app/routers/index/data_source/__init__.py | 208 ++++++++++-------- .../app/routers/index/sessions/__init__.py | 8 +- llm-service/app/services/chat.py | 3 +- llm-service/app/services/doc_summaries.py | 9 +- llm-service/app/services/qdrant.py | 41 +--- llm-service/app/tests/conftest.py | 50 ++--- .../tests/routers/index/test_data_source.py | 9 +- llm-service/pdm.lock | 34 ++- llm-service/pyproject.toml | 2 +- 13 files changed, 227 insertions(+), 196 deletions(-) rename llm-service/app/{services/rag_vector_store.py => ai/vector_stores/__init__.py} (83%) rename llm-service/app/{services/rag_qdrant_vector_store.py => ai/vector_stores/qdrant.py} (71%) rename llm-service/app/{services => ai/vector_stores}/vector_store.py (97%) diff --git a/llm-service/app/ai/indexing/index.py b/llm-service/app/ai/indexing/index.py index fcbc822ff..beb62f007 100644 --- a/llm-service/app/ai/indexing/index.py +++ b/llm-service/app/ai/indexing/index.py @@ -49,8 +49,8 @@ from llama_index.core.schema import BaseNode, Document, TextNode from llama_index.readers.file import DocxReader +from ...ai.vector_stores.vector_store import VectorStore from ...services.utils import batch_sequence, flatten_sequence -from ...services.vector_store import VectorStore from .readers.csv import CSVReader from .readers.json import JSONReader from .readers.nop import NopReader @@ -124,7 +124,7 @@ def index_file(self, file_path: Path, document_id: str) -> None: # we're capturing "text". converted_chunks: List[BaseNode] = [chunk for chunk in chunk_batch] - chunks_vector_store = self.chunks_vector_store.access_vector_store() + chunks_vector_store = self.chunks_vector_store.llama_vector_store() chunks_vector_store.add(converted_chunks) logger.debug(f"Indexing file: {file_path} completed") diff --git a/llm-service/app/services/rag_vector_store.py b/llm-service/app/ai/vector_stores/__init__.py similarity index 83% rename from llm-service/app/services/rag_vector_store.py rename to llm-service/app/ai/vector_stores/__init__.py index 332134289..e2b4ac6c2 100644 --- a/llm-service/app/services/rag_vector_store.py +++ b/llm-service/app/ai/vector_stores/__init__.py @@ -35,14 +35,3 @@ # BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF # DATA. # - -from .rag_qdrant_vector_store import RagQdrantVectorStore -from .vector_store import VectorStore - - -def create_rag_vector_store(data_source_id: int) -> VectorStore: - return RagQdrantVectorStore(table_name=f"index_{data_source_id}") - - -def create_summary_vector_store(data_source_id: int) -> VectorStore: - return RagQdrantVectorStore(table_name=f"summary_index_{data_source_id}") diff --git a/llm-service/app/services/rag_qdrant_vector_store.py b/llm-service/app/ai/vector_stores/qdrant.py similarity index 71% rename from llm-service/app/services/rag_qdrant_vector_store.py rename to llm-service/app/ai/vector_stores/qdrant.py index f7b78f681..a833b2c49 100644 --- a/llm-service/app/services/rag_qdrant_vector_store.py +++ b/llm-service/app/ai/vector_stores/qdrant.py @@ -37,21 +37,43 @@ # import os +from typing import Optional import qdrant_client from llama_index.core.vector_stores.types import BasePydanticVectorStore -from llama_index.vector_stores.qdrant import QdrantVectorStore +from llama_index.vector_stores.qdrant import ( + QdrantVectorStore as LlamaIndexQdrantVectorStore, +) from qdrant_client.http.models import CountResult from .vector_store import VectorStore -class RagQdrantVectorStore(VectorStore): +def new_qdrant_client() -> qdrant_client.QdrantClient: host = os.environ.get("QDRANT_HOST", "localhost") port = 6333 + return qdrant_client.QdrantClient(host=host, port=port) - def __init__(self, table_name: str, memory_store: bool = False): - self.client = self._create_qdrant_clients(memory_store) + +class QdrantVectorStore(VectorStore): + @staticmethod + def for_chunks( + data_source_id: int, client: Optional[qdrant_client.QdrantClient] = None + ) -> "QdrantVectorStore": + return QdrantVectorStore(table_name=f"index_{data_source_id}", client=client) + + @staticmethod + def for_summaries( + data_source_id: int, client: Optional[qdrant_client.QdrantClient] = None + ) -> "QdrantVectorStore": + return QdrantVectorStore( + table_name=f"summary_index_{data_source_id}", client=client + ) + + def __init__( + self, table_name: str, client: Optional[qdrant_client.QdrantClient] = None + ): + self.client = client or new_qdrant_client() self.table_name = table_name def size(self) -> int: @@ -70,14 +92,6 @@ def delete(self) -> None: def exists(self) -> bool: return self.client.collection_exists(self.table_name) - def _create_qdrant_clients(self, memory_store: bool) -> qdrant_client.QdrantClient: - if memory_store: - client = qdrant_client.QdrantClient(":memory:") - else: - client = qdrant_client.QdrantClient(host=self.host, port=self.port) - - return client - - def access_vector_store(self) -> BasePydanticVectorStore: - vector_store = QdrantVectorStore(self.table_name, self.client) + def llama_vector_store(self) -> BasePydanticVectorStore: + vector_store = LlamaIndexQdrantVectorStore(self.table_name, self.client) return vector_store diff --git a/llm-service/app/services/vector_store.py b/llm-service/app/ai/vector_stores/vector_store.py similarity index 97% rename from llm-service/app/services/vector_store.py rename to llm-service/app/ai/vector_stores/vector_store.py index 77fd31f29..e6bdfaf17 100644 --- a/llm-service/app/services/vector_store.py +++ b/llm-service/app/ai/vector_stores/vector_store.py @@ -55,7 +55,7 @@ def delete(self) -> None: """Delete the vector store""" @abstractmethod - def access_vector_store(self) -> BasePydanticVectorStore: + def llama_vector_store(self) -> BasePydanticVectorStore: """Access the underlying llama-index vector store implementation""" @abstractmethod diff --git a/llm-service/app/routers/index/data_source/__init__.py b/llm-service/app/routers/index/data_source/__init__.py index dedf8c5d1..d45343bc8 100644 --- a/llm-service/app/routers/index/data_source/__init__.py +++ b/llm-service/app/routers/index/data_source/__init__.py @@ -33,13 +33,17 @@ import tempfile from pathlib import Path -from fastapi import APIRouter +from fastapi import APIRouter, Depends +from fastapi_utils.cbv import cbv +from llama_index.core.indices import VectorStoreIndex from llama_index.core.node_parser import SentenceSplitter from pydantic import BaseModel from .... import exceptions from ....ai.indexing.index import Indexer -from ....services import doc_summaries, models, qdrant, rag_vector_store, s3 +from ....ai.vector_stores.qdrant import QdrantVectorStore +from ....ai.vector_stores.vector_store import VectorStore +from ....services import doc_summaries, models, s3 logger = logging.getLogger(__name__) @@ -51,58 +55,6 @@ class SummarizeDocumentRequest(BaseModel): s3_document_key: str -@router.get("/size", summary="Returns the number of chunks in the data source.") -@exceptions.propagates -def size(data_source_id: int) -> int: - data_source_size = qdrant.size_of(data_source_id) - qdrant.check_data_source_exists(data_source_size) - return data_source_size - - -@router.get("/chunks/{chunk_id}", summary="Returns the content of a chunk.") -@exceptions.propagates -def chunk_contents(data_source_id: int, chunk_id: str) -> str: - return qdrant.chunk_contents(data_source_id, chunk_id) - - -@router.delete("", summary="Deletes the data source from the index.") -@exceptions.propagates -def delete(data_source_id: int) -> None: - qdrant.delete(data_source_id) - doc_summaries.delete_data_source(data_source_id) - - -@router.get("/documents/{doc_id}/summary", summary="summarize a single document") -@exceptions.propagates -def get_document_summary(data_source_id: int, doc_id: str) -> str: - summaries = doc_summaries.read_summary(data_source_id, doc_id) - return summaries - - -@router.get("/summary", summary="summarize all documents for a datasource") -@exceptions.propagates -def get_document_summary_of_summaries(data_source_id: int) -> str: - return doc_summaries.summarize_data_source(data_source_id) - - -@router.post("/summarize-document", summary="summarize a document") -@exceptions.propagates -def summarize_document( - data_source_id: int, - request: SummarizeDocumentRequest, -) -> str: - return doc_summaries.generate_summary( - data_source_id, request.s3_bucket_name, request.s3_document_key - ) - - -@router.delete("/documents/{doc_id}", summary="delete a single document") -@exceptions.propagates -def delete_document(data_source_id: int, doc_id: str) -> None: - qdrant.delete_document(data_source_id, doc_id) - doc_summaries.delete_document(data_source_id, doc_id) - - class RagIndexDocumentConfiguration(BaseModel): # TODO: Add more params chunk_size: int = 512 # this is llama-index's default @@ -116,38 +68,118 @@ class RagIndexDocumentRequest(BaseModel): configuration: RagIndexDocumentConfiguration = RagIndexDocumentConfiguration() -@router.post( - "/documents/download-and-index", - summary="Download and index document", - description="Download document from S3 and index in Pinecone", -) -@exceptions.propagates -def download_and_index( - data_source_id: int, - request: RagIndexDocumentRequest, -) -> None: - with tempfile.TemporaryDirectory() as tmpdirname: - logger.debug("created temporary directory %s", tmpdirname) - s3.download(tmpdirname, request.s3_bucket_name, request.s3_document_key) - # Get the single file in the directory - files = os.listdir(tmpdirname) - if len(files) != 1: - raise ValueError("Expected a single file in the temporary directory") - file_path = Path(os.path.join(tmpdirname, files[0])) - - indexer = Indexer( - data_source_id, - splitter=SentenceSplitter( - chunk_size=request.configuration.chunk_size, - chunk_overlap=int( - request.configuration.chunk_overlap - * 0.01 - * request.configuration.chunk_size - ), - ), - embedding_model=models.get_embedding_model(), - chunks_vector_store=rag_vector_store.create_rag_vector_store( - data_source_id - ), +@cbv(router) +class DataSourceController: + chunks_vector_store: VectorStore = Depends( + lambda data_source_id: QdrantVectorStore.for_chunks(data_source_id) + ) + + @router.get( + "/size", + summary="Returns the number of chunks in the data source.", + response_model=None, + ) + @exceptions.propagates + def size(self) -> int: + return self.chunks_vector_store.size() + + @router.get( + "/chunks/{chunk_id}", + summary="Returns the content of a chunk.", + response_model=None, + ) + @exceptions.propagates + def chunk_contents(self, chunk_id: str) -> str: + return ( + self.chunks_vector_store.llama_vector_store() + .get_nodes([chunk_id])[0] + .get_content() + ) + + @router.delete( + "/", summary="Deletes the data source from the index.", response_model=None + ) + @exceptions.propagates + def delete(self, data_source_id: int) -> None: + self.chunks_vector_store.delete() + doc_summaries.delete_data_source(data_source_id) + + @router.get( + "/documents/{doc_id}/summary", + summary="summarize a single document", + response_model=None, + ) + @exceptions.propagates + def get_document_summary(self, data_source_id: int, doc_id: str) -> str: + summaries = doc_summaries.read_summary(data_source_id, doc_id) + return summaries + + @router.get( + "/summary", + summary="summarize all documents for a datasource", + response_model=None, + ) + @exceptions.propagates + def get_document_summary_of_summaries(self, data_source_id: int) -> str: + return doc_summaries.summarize_data_source(data_source_id) + + @router.post( + "/summarize-document", summary="summarize a document", response_model=None + ) + @exceptions.propagates + def summarize_document( + self, + data_source_id: int, + request: SummarizeDocumentRequest, + ) -> str: + return doc_summaries.generate_summary( + data_source_id, request.s3_bucket_name, request.s3_document_key + ) + + @router.delete( + "/documents/{doc_id}", summary="delete a single document", response_model=None + ) + @exceptions.propagates + def delete_document(self, data_source_id: int, doc_id: str) -> None: + index = VectorStoreIndex.from_vector_store( + vector_store=self.chunks_vector_store.llama_vector_store(), + embed_model=models.get_embedding_model(), ) - indexer.index_file(file_path, request.document_id) + index.delete_ref_doc(doc_id) + doc_summaries.delete_document(data_source_id, doc_id) + + @router.post( + "/documents/download-and-index", + summary="Download and index document", + description="Download document from S3 and index in Pinecone", + response_model=None, + ) + @exceptions.propagates + def download_and_index( + self, + data_source_id: int, + request: RagIndexDocumentRequest, + ) -> None: + with tempfile.TemporaryDirectory() as tmpdirname: + logger.debug("created temporary directory %s", tmpdirname) + s3.download(tmpdirname, request.s3_bucket_name, request.s3_document_key) + # Get the single file in the directory + files = os.listdir(tmpdirname) + if len(files) != 1: + raise ValueError("Expected a single file in the temporary directory") + file_path = Path(os.path.join(tmpdirname, files[0])) + + indexer = Indexer( + data_source_id, + splitter=SentenceSplitter( + chunk_size=request.configuration.chunk_size, + chunk_overlap=int( + request.configuration.chunk_overlap + * 0.01 + * request.configuration.chunk_size + ), + ), + embedding_model=models.get_embedding_model(), + chunks_vector_store=self.chunks_vector_store, + ) + indexer.index_file(file_path, request.document_id) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 422b988d1..37e9f929d 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -38,10 +38,11 @@ import time import uuid -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException from pydantic import BaseModel from .... import exceptions +from ....ai.vector_stores.qdrant import QdrantVectorStore from ....rag_types import RagPredictConfiguration from ....services import llm_completion, qdrant from ....services.chat import generate_suggested_questions, v2_chat @@ -130,8 +131,9 @@ def suggest_questions( session_id: int, request: SuggestQuestionsRequest, ) -> RagSuggestedQuestionsResponse: - data_source_size = qdrant.size_of(request.data_source_id) - qdrant.check_data_source_exists(data_source_size) + data_source_size = QdrantVectorStore.for_chunks(request.data_source_id).size() + if data_source_size == -1: + raise HTTPException(status_code=404, detail="Knowledge base not found.") suggested_questions = generate_suggested_questions( request.configuration, request.data_source_id, data_source_size, session_id ) diff --git a/llm-service/app/services/chat.py b/llm-service/app/services/chat.py index 9df23f39f..804f9d020 100644 --- a/llm-service/app/services/chat.py +++ b/llm-service/app/services/chat.py @@ -43,6 +43,7 @@ from llama_index.core.base.llms.types import MessageRole from llama_index.core.chat_engine.types import AgentChatResponse +from ..ai.vector_stores.qdrant import QdrantVectorStore from ..rag_types import RagPredictConfiguration from . import evaluators, qdrant from .chat_store import ( @@ -61,7 +62,7 @@ def v2_chat( configuration: RagPredictConfiguration, ) -> RagStudioChatMessage: response_id = str(uuid.uuid4()) - if qdrant.size_of(data_source_id) == 0: + if QdrantVectorStore.for_chunks(data_source_id).size() == 0: return RagStudioChatMessage( id=response_id, source_nodes=[], diff --git a/llm-service/app/services/doc_summaries.py b/llm-service/app/services/doc_summaries.py index 7572d93ca..586cdd641 100644 --- a/llm-service/app/services/doc_summaries.py +++ b/llm-service/app/services/doc_summaries.py @@ -50,8 +50,9 @@ from llama_index.core.node_parser import SentenceSplitter from llama_index.core.readers import SimpleDirectoryReader +from ..ai.vector_stores.qdrant import QdrantVectorStore from ..config import settings -from . import models, rag_vector_store +from . import models from .s3 import download from .utils import get_last_segment @@ -158,9 +159,9 @@ def summarize_data_source(data_source_id: int) -> str: def make_storage_context(data_source_id: int) -> StorageContext: storage_context = StorageContext.from_defaults( persist_dir=index_dir(data_source_id), - vector_store=rag_vector_store.create_summary_vector_store( + vector_store=QdrantVectorStore.for_summaries( data_source_id - ).access_vector_store(), + ).llama_vector_store(), ) return storage_context @@ -174,7 +175,7 @@ def delete_data_source(data_source_id: int) -> None: index = index_dir(data_source_id) if os.path.exists(index): shutil.rmtree(index) - rag_vector_store.create_summary_vector_store(data_source_id).delete() + QdrantVectorStore.for_summaries(data_source_id).delete() def delete_document(data_source_id: int, doc_id: str) -> None: diff --git a/llm-service/app/services/qdrant.py b/llm-service/app/services/qdrant.py index 314264359..baf837fc9 100644 --- a/llm-service/app/services/qdrant.py +++ b/llm-service/app/services/qdrant.py @@ -47,56 +47,21 @@ from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.response_synthesizers import get_response_synthesizer +from ..ai.vector_stores.qdrant import QdrantVectorStore from ..rag_types import RagPredictConfiguration -from . import models, rag_vector_store +from . import models from .chat_store import RagContext logger = logging.getLogger(__name__) -def check_data_source_exists(data_source_size: int) -> None: - if data_source_size == -1: - raise HTTPException(status_code=404, detail="Knowledge base not found.") - - -def size_of(data_source_id: int) -> int: - vector_store = rag_vector_store.create_rag_vector_store(data_source_id) - return vector_store.size() - - -def chunk_contents(data_source_id: int, chunk_id: str) -> str: - vector_store = rag_vector_store.create_rag_vector_store( - data_source_id - ).access_vector_store() - node = vector_store.get_nodes([chunk_id])[0] - return node.get_content() - - -def delete(data_source_id: int) -> None: - vector_store = rag_vector_store.create_rag_vector_store(data_source_id) - vector_store.delete() - - -def delete_document(data_source_id: int, document_id: str) -> None: - vector_store = rag_vector_store.create_rag_vector_store( - data_source_id - ).access_vector_store() - index = VectorStoreIndex.from_vector_store( - vector_store=vector_store, - embed_model=models.get_embedding_model(), - ) - index.delete_ref_doc(document_id) - - def query( data_source_id: int, query_str: str, configuration: RagPredictConfiguration, chat_history: list[RagContext], ) -> AgentChatResponse: - vector_store = rag_vector_store.create_rag_vector_store( - data_source_id - ).access_vector_store() + vector_store = QdrantVectorStore.for_chunks(data_source_id).llama_vector_store() embedding_model = models.get_embedding_model() index = VectorStoreIndex.from_vector_store( vector_store=vector_store, diff --git a/llm-service/app/tests/conftest.py b/llm-service/app/tests/conftest.py index 092df5048..e89c7ff08 100644 --- a/llm-service/app/tests/conftest.py +++ b/llm-service/app/tests/conftest.py @@ -45,6 +45,7 @@ import boto3 import pytest +import qdrant_client as q_client from boto3.resources.base import ServiceResource from fastapi.testclient import TestClient from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding @@ -62,17 +63,23 @@ from moto import mock_aws from pydantic import Field +from app.ai.vector_stores.qdrant import QdrantVectorStore from app.main import app -from app.services import models, rag_vector_store -from app.services.rag_qdrant_vector_store import RagQdrantVectorStore +from app.services import models from app.services.utils import get_last_segment + @dataclass class BotoObject: bucket_name: str key: str +@pytest.fixture +def qdrant_client() -> q_client.QdrantClient: + return q_client.QdrantClient(":memory:") + + @pytest.fixture def aws_region() -> str: return os.environ.get("AWS_DEFAULT_REGION", "us-west-2") @@ -188,38 +195,27 @@ def _get_text_embedding(self, text: str) -> Embedding: return [0.1] * 1024 -# We're hacking our vector stores to run in-memory. Since they are in memory, we need -# to be sure to return the same instance for the same data source id -table_name_to_vector_store: Dict[int, RagQdrantVectorStore] = {} - - -def _get_vector_store_instance( - data_source_id: int, table_prefix: str -) -> RagQdrantVectorStore: - if data_source_id in table_name_to_vector_store: - return table_name_to_vector_store[data_source_id] - res = RagQdrantVectorStore( - table_name=f"{table_prefix}{data_source_id}", memory_store=True - ) - table_name_to_vector_store[data_source_id] = res - return res - - @pytest.fixture(autouse=True) -def vector_store(monkeypatch: pytest.MonkeyPatch) -> None: +def vector_store( + monkeypatch: pytest.MonkeyPatch, qdrant_client: q_client.QdrantClient +) -> None: + original = QdrantVectorStore.for_chunks monkeypatch.setattr( - rag_vector_store, - "create_rag_vector_store", - lambda ds_id: _get_vector_store_instance(ds_id, "index_"), + QdrantVectorStore, + "for_chunks", + lambda ds_id: original(ds_id, qdrant_client), ) @pytest.fixture(autouse=True) -def summary_vector_store(monkeypatch: pytest.MonkeyPatch) -> None: +def summary_vector_store( + monkeypatch: pytest.MonkeyPatch, qdrant_client: q_client.QdrantClient +) -> None: + original = QdrantVectorStore.for_summaries monkeypatch.setattr( - rag_vector_store, - "create_summary_vector_store", - lambda ds_id: _get_vector_store_instance(ds_id, "summary_index_"), + QdrantVectorStore, + "for_summaries", + lambda ds_id: original(ds_id, qdrant_client), ) diff --git a/llm-service/app/tests/routers/index/test_data_source.py b/llm-service/app/tests/routers/index/test_data_source.py index 5498d6237..38c2e0619 100644 --- a/llm-service/app/tests/routers/index/test_data_source.py +++ b/llm-service/app/tests/routers/index/test_data_source.py @@ -40,16 +40,15 @@ from typing import Any -from app.services import models, rag_vector_store +from app.ai.vector_stores.qdrant import QdrantVectorStore +from app.services import models from fastapi.testclient import TestClient from llama_index.core import VectorStoreIndex from llama_index.core.vector_stores import VectorStoreQuery def get_vector_store_index(data_source_id: int) -> VectorStoreIndex: - vector_store = rag_vector_store.create_rag_vector_store( - data_source_id - ).access_vector_store() + vector_store = QdrantVectorStore.for_chunks(data_source_id).llama_vector_store() index = VectorStoreIndex.from_vector_store( vector_store, embed_model=models.get_embedding_model() ) @@ -99,7 +98,7 @@ def test_delete_data_source( response = client.delete(f"/data_sources/{data_source_id}") assert response.status_code == 200 - vector_store = rag_vector_store.create_rag_vector_store(data_source_id) + vector_store = QdrantVectorStore.for_chunks(data_source_id) assert vector_store.exists() is False get_summary_response = client.get( diff --git a/llm-service/pdm.lock b/llm-service/pdm.lock index f7cc56681..8e10a8433 100644 --- a/llm-service/pdm.lock +++ b/llm-service/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:37f4e048b91c6850dc6133d0ab73273fa296454ed6776ad177856e619ff421d3" +content_hash = "sha256:7332d13baf226d936edd5f0c92df7dc679c7de03acca90b8f0cc3ed7ffe59ca4" [[metadata.targets]] requires_python = "==3.10.*" @@ -457,6 +457,22 @@ files = [ {file = "fastapi_cli-0.0.5.tar.gz", hash = "sha256:d30e1239c6f46fcb95e606f02cdda59a1e2fa778a54b64686b3ff27f6211ff9f"}, ] +[[package]] +name = "fastapi-utils" +version = "0.8.0" +requires_python = "<4.0,>=3.8" +summary = "Reusable utilities for FastAPI" +groups = ["default"] +dependencies = [ + "fastapi<1.0,>=0.89", + "psutil<6,>=5", + "pydantic<3.0,>1.0", +] +files = [ + {file = "fastapi_utils-0.8.0-py3-none-any.whl", hash = "sha256:6c4d507a76bab9a016cee0c4fa3a4638c636b2b2689e39c62254b1b2e4e81825"}, + {file = "fastapi_utils-0.8.0.tar.gz", hash = "sha256:eca834e80c09f85df30004fe5e861981262b296f60c93d5a1a1416fe4c784140"}, +] + [[package]] name = "filelock" version = "3.16.1" @@ -1360,6 +1376,22 @@ files = [ {file = "protobuf-5.28.3.tar.gz", hash = "sha256:64badbc49180a5e401f373f9ce7ab1d18b63f7dd4a9cdc43c92b9f0b481cef7b"}, ] +[[package]] +name = "psutil" +version = "5.9.8" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +summary = "Cross-platform lib for process and system monitoring in Python." +groups = ["default"] +files = [ + {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"}, + {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"}, + {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"}, + {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"}, + {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"}, + {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"}, +] + [[package]] name = "py-partiql-parser" version = "0.5.6" diff --git a/llm-service/pyproject.toml b/llm-service/pyproject.toml index eeff2848d..27265c12e 100644 --- a/llm-service/pyproject.toml +++ b/llm-service/pyproject.toml @@ -5,7 +5,7 @@ description = "Default template for PDM package" authors = [ {name = "Conrado Silva Miranda", email = "csilvamiranda@cloudera.com"}, ] -dependencies = ["llama-index-core==0.10.68", "llama-index-readers-file==0.1.33", "fastapi==0.111.0", "pydantic==2.8.2", "pydantic-settings==2.3.4", "boto3>=1.35.66", "llama-index-embeddings-bedrock==0.2.1", "llama-index-llms-bedrock==0.1.13", "llama-index-llms-openai==0.1.31", "llama-index-llms-mistralai==0.1.20", "llama-index-embeddings-openai==0.1.11", "llama-index-vector-stores-qdrant==0.2.17", "docx2txt>=0.8", "pandas>=2.2.3"] +dependencies = ["llama-index-core==0.10.68", "llama-index-readers-file==0.1.33", "fastapi==0.111.0", "pydantic==2.8.2", "pydantic-settings==2.3.4", "boto3>=1.35.66", "llama-index-embeddings-bedrock==0.2.1", "llama-index-llms-bedrock==0.1.13", "llama-index-llms-openai==0.1.31", "llama-index-llms-mistralai==0.1.20", "llama-index-embeddings-openai==0.1.11", "llama-index-vector-stores-qdrant==0.2.17", "docx2txt>=0.8", "pandas>=2.2.3", "fastapi-utils>=0.8.0"] requires-python = "==3.10.*" readme = "README.md" license = {text = "APACHE"} From 8b128b72371accbaa2cd9caa2290fa85259b9ab4 Mon Sep 17 00:00:00 2001 From: Conrado Silva Miranda Date: Fri, 22 Nov 2024 09:12:06 -0800 Subject: [PATCH 2/2] lint --- llm-service/app/routers/index/sessions/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llm-service/app/routers/index/sessions/__init__.py b/llm-service/app/routers/index/sessions/__init__.py index 37e9f929d..cfe25ce79 100644 --- a/llm-service/app/routers/index/sessions/__init__.py +++ b/llm-service/app/routers/index/sessions/__init__.py @@ -44,7 +44,7 @@ from .... import exceptions from ....ai.vector_stores.qdrant import QdrantVectorStore from ....rag_types import RagPredictConfiguration -from ....services import llm_completion, qdrant +from ....services import llm_completion from ....services.chat import generate_suggested_questions, v2_chat from ....services.chat_store import RagStudioChatMessage, chat_store