cloudera
diff --git a/‎.gitignore
Lines changed: 2 additions & 1 deletion b/‎.gitignore
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/src/main/java/com/cloudera/cai/rag/Types.java
Lines changed: 5 additions & 1 deletion b/‎backend/src/main/java/com/cloudera/cai/rag/Types.java
Lines changed: 5 additions & 1 deletion
diff --git a/‎backend/src/main/java/com/cloudera/cai/rag/sessions/SessionRepository.java
Lines changed: 4 additions & 1 deletion b/‎backend/src/main/java/com/cloudera/cai/rag/sessions/SessionRepository.java
Lines changed: 4 additions & 1 deletion
diff --git a/‎backend/src/test/java/com/cloudera/cai/rag/TestData.java
Lines changed: 2 additions & 2 deletions b/‎backend/src/test/java/com/cloudera/cai/rag/TestData.java
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/src/test/java/com/cloudera/cai/rag/sessions/SessionControllerTest.java
Lines changed: 3 additions & 2 deletions b/‎backend/src/test/java/com/cloudera/cai/rag/sessions/SessionControllerTest.java
Lines changed: 3 additions & 2 deletions
diff --git a/‎llm-service/app/config.py
Lines changed: 9 additions & 0 deletions b/‎llm-service/app/config.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎llm-service/app/main.py
Lines changed: 12 additions & 0 deletions b/‎llm-service/app/main.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎llm-service/app/rag_types.py
Lines changed: 0 additions & 3 deletions b/‎llm-service/app/rag_types.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎llm-service/app/routers/index/sessions/__init__.py
Lines changed: 12 additions & 5 deletions b/‎llm-service/app/routers/index/sessions/__init__.py
Lines changed: 12 additions & 5 deletions
diff --git a/‎llm-service/app/routers/index/tools/__init__.py
Lines changed: 17 additions & 11 deletions b/‎llm-service/app/routers/index/tools/__init__.py
Lines changed: 17 additions & 11 deletions
diff --git a/‎llm-service/app/services/chat/chat.py
Lines changed: 68 additions & 19 deletions b/‎llm-service/app/services/chat/chat.py
Lines changed: 68 additions & 19 deletions
@@ -9,4 +9,5 @@ databases/
 **/docling-output.txt
 **/.DS_Store
 .history
-addresses/
+addresses/
+tools/
@@ -101,8 +101,12 @@ public record RagDataSource(
       @Nullable Long totalDocSize,
       boolean availableForDefaultProject) {}
 
+  @With
   public record QueryConfiguration(
-      boolean enableHyde, boolean enableSummaryFilter, boolean enableToolCalling) {}
+      boolean enableHyde,
+      boolean enableSummaryFilter,
+      boolean enableToolCalling,
+      List<String> selectedTools) {}
 
   @With
   @Builder
 
@@ -57,7 +57,7 @@
 @Component
 public class SessionRepository {
   public static final Types.QueryConfiguration DEFAULT_QUERY_CONFIGURATION =
-      new Types.QueryConfiguration(false, true, false);
+      new Types.QueryConfiguration(false, true, false, List.of());
   private final Jdbi jdbi;
   private final ObjectMapper objectMapper = new ObjectMapper();
 
@@ -169,6 +169,9 @@ private Types.QueryConfiguration extractQueryConfiguration(RowView rowView)
     if (queryConfiguration == null) {
       return DEFAULT_QUERY_CONFIGURATION;
     }
+    if (queryConfiguration.selectedTools() == null) {
+      queryConfiguration = queryConfiguration.withSelectedTools(List.of());
+    }
     return queryConfiguration;
   }
 
 
@@ -82,7 +82,7 @@ public static Types.Session createTestSessionInstance(
         "test-model",
         "test-rerank-model",
         3,
-        new Types.QueryConfiguration(false, true, true));
+        new Types.QueryConfiguration(false, true, true, List.of()));
   }
 
   public static Types.CreateSession createSessionInstance(String sessionName) {
@@ -97,7 +97,7 @@ public static Types.CreateSession createSessionInstance(
         "test-model",
         "test-rerank-model",
         3,
-        new Types.QueryConfiguration(false, true, true),
+        new Types.QueryConfiguration(false, true, true, List.of()),
         projectId);
   }
 
 
@@ -145,7 +145,8 @@ void update() {
                 .withRerankModel(updatedRerankModel)
                 .withName(updatedName)
                 .withProjectId(updatedProjectId)
-                .withQueryConfiguration(new Types.QueryConfiguration(true, false, true)),
+                .withQueryConfiguration(
+                    new Types.QueryConfiguration(true, false, true, List.of("foo"))),
             request);
 
     assertThat(updatedSession.id()).isNotNull();
@@ -160,7 +161,7 @@ void update() {
     assertThat(updatedSession.createdById()).isEqualTo("test-user");
     assertThat(updatedSession.lastInteractionTime()).isNull();
     assertThat(updatedSession.queryConfiguration())
-        .isEqualTo(new Types.QueryConfiguration(true, false, true));
+        .isEqualTo(new Types.QueryConfiguration(true, false, true, List.of("foo")));
   }
 
   @Test
 
@@ -68,6 +68,10 @@ def rag_log_level(self) -> int:
     def rag_databases_dir(self) -> str:
         return os.environ.get("RAG_DATABASES_DIR", os.path.join("..", "databases"))
 
+    @property
+    def tools_dir(self) -> str:
+        return os.path.join("..", "tools")
+
     @property
     def caii_domain(self) -> str:
         return os.environ["CAII_DOMAIN"]
@@ -149,6 +153,10 @@ def azure_openai_api_key(self) -> Optional[str]:
     def azure_openai_endpoint(self) -> Optional[str]:
         return os.environ.get("AZURE_OPENAI_ENDPOINT")
 
+    @property
+    def azure_openai_api_version(self) -> Optional[str]:
+        return os.environ.get("AZURE_OPENAI_API_VERSION")
+
     @property
     def openai_api_key(self) -> Optional[str]:
         return os.environ.get("OPENAI_API_KEY")
@@ -157,4 +165,5 @@ def openai_api_key(self) -> Optional[str]:
     def openai_api_base(self) -> Optional[str]:
         return os.environ.get("OPENAI_API_BASE")
 
+
 settings = _Settings()
@@ -38,12 +38,14 @@
 
 import functools
 import logging
+import os
 import sys
 import time
 from collections.abc import Awaitable, Callable
 from contextlib import asynccontextmanager
 from typing import AsyncGenerator
 
+import opik
 from fastapi import FastAPI, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from uvicorn.logging import DefaultFormatter
@@ -73,6 +75,16 @@ def _configure_logger() -> None:
 
 _configure_logger()
 
+if os.environ.get("ENABLE_OPIK") == "True":
+    opik.configure(
+        use_local=True, url=os.environ.get("OPIK_URL", "http://localhost:5174")
+    )
+
+    from llama_index.core import set_global_handler
+
+    # You should provide your OPIK API key and Workspace using the following environment variables:
+    # OPIK_API_KEY, OPIK_WORKSPACE
+    set_global_handler("opik")
 
 ###################################
 #  Lifespan events
 
@@ -40,10 +40,7 @@
 
 from pydantic import BaseModel
 
-from app.services.query.query_configuration import tool_types
-
 
 class RagPredictConfiguration(BaseModel):
     exclude_knowledge_base: Optional[bool] = False
     use_question_condensing: Optional[bool] = True
-    tools: Optional[list[tool_types]] = None
@@ -40,8 +40,8 @@
 import logging
 import queue
 import time
-from concurrent.futures import ThreadPoolExecutor
-from typing import Optional, Generator
+from concurrent.futures import Future, ThreadPoolExecutor
+from typing import Optional, Generator, Any
 
 from fastapi import APIRouter, Header, HTTPException
 from fastapi.responses import StreamingResponse
@@ -60,7 +60,8 @@
 from ....services.chat_history.paginator import paginate
 from ....services.metadata_apis import session_metadata_api
 from ....services.mlflow import rating_mlflow_log_metric, feedback_mlflow_log_table
-from ....services.query.agents.crewai_querier import CrewEvent, poison_pill
+from ....services.query.agents.crewai_querier import poison_pill
+from ....services.query.crew_events import CrewEvent
 from ....services.session import rename_session
 
 logger = logging.getLogger(__name__)
@@ -232,8 +233,11 @@ def stream_chat_completion(
 
     crew_events_queue: queue.Queue[CrewEvent] = queue.Queue()
 
-    def crew_callback() -> Generator[str, None, None]:
+    def crew_callback(chat_future: Future[Any]) -> Generator[str, None, None]:
         while True:
+            if chat_future.done() and (e := chat_future.exception()):
+                raise e
+
             try:
                 event_data = crew_events_queue.get(block=True, timeout=1.0)
                 if event_data.type == poison_pill:
@@ -262,7 +266,7 @@ def generate_stream() -> Generator[str, None, None]:
                     crew_events_queue=crew_events_queue,
                 )
 
-                yield from crew_callback()
+                yield from crew_callback(future)
 
                 first_message = True
                 for response in future.result():
@@ -278,6 +282,9 @@ def generate_stream() -> Generator[str, None, None]:
                     json_delta = json.dumps({"text": response.delta})
                     yield f"data: {json_delta}\n\n"
                 yield f'data: {{"response_id" : "{response_id}"}}\n\n'
+        except TimeoutError:
+            logger.exception("Timeout: Failed to stream chat completion")
+            yield 'data: {{"error" : "Timeout: Failed to stream chat completion"}}\n\n'
         except Exception as e:
             logger.exception("Failed to stream chat completion")
             yield f'data: {{"error" : "{e}"}}\n\n'
 
@@ -35,23 +35,30 @@
 #  BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
 #  DATA.
 #
-
+import json
 import logging
+import os
+from typing import Any
+from pydantic import BaseModel
 
 from fastapi import APIRouter
-from pydantic import BaseModel
 
 from .... import exceptions
+from ....config import settings
 
 logger = logging.getLogger(__name__)
 
 router = APIRouter(prefix="/tools", tags=["Tools"])
 
 
 class Tool(BaseModel):
-    id: str
+    """
+    Represents a tool in the MCP configuration.
+    """
+
     name: str
-    description: str
+
+    metadata: dict[str, Any]
 
 
 @router.get(
@@ -61,10 +68,9 @@ class Tool(BaseModel):
 )
 @exceptions.propagates
 def tools() -> list[Tool]:
-    return [
-        Tool(
-            id="1",
-            name="search",
-            description="Searches the internet for the given query.",
-        ),
-    ]
+
+    mcp_json_path = os.path.join(settings.tools_dir, "mcp.json")
+
+    with open(mcp_json_path, "r") as f:
+        mcp_config = json.load(f)
+    return [Tool(**server) for server in mcp_config["mcp_servers"]]
@@ -35,13 +35,15 @@
 #  BUSINESS ADVANTAGE OR UNAVAILABILITY, OR LOSS OR CORRUPTION OF
 #  DATA.
 #
-
+import logging
+import re
 import time
 import uuid
 from typing import Optional
 
 from fastapi import HTTPException
 from llama_index.core.chat_engine.types import AgentChatResponse
+from llama_index.core.schema import NodeWithScore
 
 from app.ai.vector_stores.vector_store_factory import VectorStoreFactory
 from app.rag_types import RagPredictConfiguration
@@ -58,6 +60,7 @@
 from app.services.query import querier
 from app.services.query.query_configuration import QueryConfiguration
 
+logger = logging.getLogger(__name__)
 
 def chat(
     session: Session,
@@ -115,22 +118,41 @@ def _run_chat(
         query_configuration,
         retrieve_chat_history(session.id),
     )
-    return finalize_response(response, condensed_question, data_source_id, query, query_configuration, response_id,session, user_name)
+    return finalize_response(
+        response,
+        condensed_question,
+        data_source_id,
+        query,
+        query_configuration,
+        response_id,
+        session,
+        user_name,
+    )
 
 
-def finalize_response(chat_response: AgentChatResponse,
-                      condensed_question: str | None,
-                      data_source_id: Optional[int],
-                      query: str,
-                      query_configuration: QueryConfiguration,
-                      response_id: str,
-                      session: Session,
-                      user_name: Optional[str]) -> RagStudioChatMessage:
+def finalize_response(
+    chat_response: AgentChatResponse,
+    condensed_question: str | None,
+    data_source_id: Optional[int],
+    query: str,
+    query_configuration: QueryConfiguration,
+    response_id: str,
+    session: Session,
+    user_name: Optional[str],
+) -> RagStudioChatMessage:
     if condensed_question and (condensed_question.strip() == query.strip()):
         condensed_question = None
-    relevance, faithfulness = evaluators.evaluate_response(
-        query, chat_response, session.inference_model
-    )
+
+    if data_source_id:
+        chat_response = extract_nodes_from_response_str(chat_response, data_source_id)
+
+    evaluations = []
+    if len(chat_response.source_nodes) != 0:
+        relevance, faithfulness = evaluators.evaluate_response(
+            query, chat_response, session.inference_model
+        )
+        evaluations.append(Evaluation(name="relevance", value=relevance))
+        evaluations.append(Evaluation(name="faithfulness", value=faithfulness))
     response_source_nodes = format_source_nodes(chat_response, data_source_id)
     new_chat_message = RagStudioChatMessage(
         id=response_id,
@@ -141,10 +163,7 @@ def finalize_response(chat_response: AgentChatResponse,
             user=query,
             assistant=chat_response.response,
         ),
-        evaluations=[
-            Evaluation(name="relevance", value=relevance),
-            Evaluation(name="faithfulness", value=faithfulness),
-        ],
+        evaluations=evaluations,
         timestamp=time.time(),
         condensed_question=condensed_question,
     )
@@ -156,6 +175,38 @@ def finalize_response(chat_response: AgentChatResponse,
     return new_chat_message
 
 
+def extract_nodes_from_response_str(
+    chat_response: AgentChatResponse, data_source_id: int
+) -> AgentChatResponse:
+    # get nodes from response source nodes
+    node_ids_present = set([node.node_id for node in chat_response.source_nodes])
+    # pull the source nodes from the response citations
+    extracted_node_ids = re.findall(
+        r"<a class='rag_citation' href='(.*?)'>",
+        chat_response.response,
+    )
+    # remove duplicates
+    extracted_node_ids = [
+        node_id for node_id in extracted_node_ids if node_id not in node_ids_present
+    ]
+    if len(extracted_node_ids) > 0:
+        try:
+            qdrant_store = VectorStoreFactory.for_chunks(data_source_id)
+            vector_store = qdrant_store.llama_vector_store()
+            extracted_source_nodes = vector_store.get_nodes(node_ids=extracted_node_ids)
+
+            # cast them into NodeWithScore with score 0.0
+            extracted_source_nodes_w_score = [
+                NodeWithScore(node=node, score=0.0) for node in extracted_source_nodes
+            ]
+            # add the source nodes to the response
+            chat_response.source_nodes += extracted_source_nodes_w_score
+        except Exception as e:
+            logger.warning("Failed to extract nodes from response citations (%s): %s", extracted_node_ids, e)
+            pass
+    return chat_response
+
+
 def direct_llm_chat(
     session: Session, response_id: str, query: str, user_name: Optional[str]
 ) -> RagStudioChatMessage:
@@ -179,5 +230,3 @@ def direct_llm_chat(
     )
     chat_history_manager.append_to_history(session.id, [new_chat_message])
     return new_chat_message
-
-