diff --git a/code/backend/batch/utilities/helpers/llm_helper.py b/code/backend/batch/utilities/helpers/llm_helper.py
index 0f1ab2927..b3f75ab39 100644
--- a/code/backend/batch/utilities/helpers/llm_helper.py
+++ b/code/backend/batch/utilities/helpers/llm_helper.py
@@ -117,11 +117,14 @@ def get_chat_completion_with_functions(
             function_call=function_call,
         )
 
-    def get_chat_completion(self, messages: list[dict], model: str | None = None):
+    def get_chat_completion(
+        self, messages: list[dict], model: str | None = None, **kwargs
+    ):
         return self.openai_client.chat.completions.create(
             model=model or self.llm_model,
             messages=messages,
             max_tokens=self.llm_max_tokens,
+            **kwargs
         )
 
     def get_sk_chat_completion_service(self, service_id: str):
diff --git a/code/backend/batch/utilities/tools/question_answer_tool.py b/code/backend/batch/utilities/tools/question_answer_tool.py
index 710f55c9d..269cfac7e 100644
--- a/code/backend/batch/utilities/tools/question_answer_tool.py
+++ b/code/backend/batch/utilities/tools/question_answer_tool.py
@@ -2,26 +2,13 @@
 import logging
 import warnings
 
+from ..common.answer import Answer
 from ..common.source_document import SourceDocument
-from ..search.search import Search
-from .answering_tool_base import AnsweringToolBase
-
-from langchain.chains.llm import LLMChain
-from langchain.prompts import (
-    AIMessagePromptTemplate,
-    ChatPromptTemplate,
-    FewShotChatMessagePromptTemplate,
-    HumanMessagePromptTemplate,
-    MessagesPlaceholder,
-    PromptTemplate,
-)
-from langchain_community.callbacks import get_openai_callback
-from langchain_core.messages import SystemMessage
-
 from ..helpers.config.config_helper import ConfigHelper
-from ..helpers.llm_helper import LLMHelper
 from ..helpers.env_helper import EnvHelper
-from ..common.answer import Answer
+from ..helpers.llm_helper import LLMHelper
+from ..search.search import Search
+from .answering_tool_base import AnsweringToolBase
 
 logger = logging.getLogger(__name__)
 
@@ -46,27 +33,36 @@ def json_remove_whitespace(obj: str) -> str:
         except json.JSONDecodeError:
             return obj
 
-    def generate_llm_chain(self, question: str, sources: list[dict]):
-        answering_prompt = PromptTemplate(
-            template=self.config.prompts.answering_user_prompt,
-            input_variables=["question", "sources"],
-        )
+    @staticmethod
+    def clean_chat_history(chat_history: list[dict]) -> list[dict]:
+        return [
+            {
+                "content": message["content"],
+                "role": message["role"],
+            }
+            for message in chat_history
+        ]
 
+    def generate_messages(self, question: str, sources: list[SourceDocument]):
         sources_text = "\n\n".join(
             [f"[doc{i+1}]: {source.content}" for i, source in enumerate(sources)]
         )
 
-        return answering_prompt, {
-            "sources": sources_text,
-            "question": question,
-        }
+        return [
+            {
+                "content": self.config.prompts.answering_user_prompt.format(
+                    question=question, sources=sources_text
+                ),
+                "role": "user",
+            },
+        ]
 
-    def generate_on_your_data_llm_chain(
+    def generate_on_your_data_messages(
         self,
         question: str,
         chat_history: list[dict],
         sources: list[SourceDocument],
-    ):
+    ) -> list[dict]:
         examples = []
 
         few_shot_example = {
@@ -82,38 +78,28 @@ def generate_on_your_data_llm_chain(
 
         if any(few_shot_example.values()):
             if all((few_shot_example.values())):
-                examples.append(few_shot_example)
+                examples.append(
+                    {
+                        "content": self.config.prompts.answering_user_prompt.format(
+                            sources=few_shot_example["sources"],
+                            question=few_shot_example["question"],
+                        ),
+                        "name": "example_user",
+                        "role": "system",
+                    }
+                )
+                examples.append(
+                    {
+                        "content": few_shot_example["answer"],
+                        "name": "example_assistant",
+                        "role": "system",
+                    }
+                )
             else:
                 warnings.warn(
                     "Not all example fields are set in the config. Skipping few-shot example."
                 )
 
-        example_prompt = ChatPromptTemplate.from_messages(
-            [
-                HumanMessagePromptTemplate.from_template(
-                    self.config.prompts.answering_user_prompt
-                ),
-                AIMessagePromptTemplate.from_template("{answer}"),
-            ]
-        )
-
-        few_shot_prompt = FewShotChatMessagePromptTemplate(
-            example_prompt=example_prompt,
-            examples=examples,
-        )
-
-        answering_prompt = ChatPromptTemplate.from_messages(
-            [
-                SystemMessage(content=self.config.prompts.answering_system_prompt),
-                few_shot_prompt,
-                SystemMessage(content=self.env_helper.AZURE_OPENAI_SYSTEM_MESSAGE),
-                MessagesPlaceholder("chat_history"),
-                HumanMessagePromptTemplate.from_template(
-                    self.config.prompts.answering_user_prompt
-                ),
-            ]
-        )
-
         documents = json.dumps(
             {
                 "retrieved_documents": [
@@ -124,39 +110,44 @@ def generate_on_your_data_llm_chain(
             separators=(",", ":"),
         )
 
-        return answering_prompt, {
-            "sources": documents,
-            "question": question,
-            "chat_history": chat_history,
-        }
+        return [
+            {
+                "content": self.config.prompts.answering_system_prompt,
+                "role": "system",
+            },
+            *examples,
+            {
+                "content": self.env_helper.AZURE_OPENAI_SYSTEM_MESSAGE,
+                "role": "system",
+            },
+            *QuestionAnswerTool.clean_chat_history(chat_history),
+            {
+                "content": self.config.prompts.answering_user_prompt.format(
+                    sources=documents,
+                    question=question,
+                ),
+                "role": "user",
+            },
+        ]
 
-    def answer_question(
-        self, question: str, chat_history: list[SourceDocument], **kwargs
-    ):
+    def answer_question(self, question: str, chat_history: list[dict], **kwargs):
         source_documents = Search.get_source_documents(self.search_handler, question)
 
         if self.config.prompts.use_on_your_data_format:
-            answering_prompt, input = self.generate_on_your_data_llm_chain(
+            messages = self.generate_on_your_data_messages(
                 question, chat_history, source_documents
             )
         else:
             warnings.warn(
                 "Azure OpenAI On Your Data prompt format is recommended and should be enabled in the Admin app.",
             )
-            answering_prompt, input = self.generate_llm_chain(
-                question, source_documents
-            )
+            messages = self.generate_messages(question, source_documents)
 
         llm_helper = LLMHelper()
 
-        answer_generator = LLMChain(
-            llm=llm_helper.get_llm(), prompt=answering_prompt, verbose=self.verbose
-        )
-
-        with get_openai_callback() as cb:
-            result = answer_generator(input)
+        response = llm_helper.get_chat_completion(messages, temperature=0)
 
-        answer = result["text"]
+        answer = response.choices[0].message.content
         logger.debug(f"Answer: {answer}")
 
         # Generate Answer Object
@@ -164,7 +155,7 @@ def answer_question(
             question=question,
             answer=answer,
             source_documents=source_documents,
-            prompt_tokens=cb.prompt_tokens,
-            completion_tokens=cb.completion_tokens,
+            prompt_tokens=response.usage.prompt_tokens,
+            completion_tokens=response.usage.completion_tokens,
         )
         return clean_answer
diff --git a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py
index ad0bfd80f..1341d6878 100644
--- a/code/tests/functional/tests/backend_api/default/test_conversation_custom.py
+++ b/code/tests/functional/tests/backend_api/default/test_conversation_custom.py
@@ -542,11 +542,13 @@ def test_post_makes_correct_call_to_openai_chat_completions_with_documents(
                     },
                     {
                         "content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nuser question',
-                        "role": "user",
+                        "name": "example_user",
+                        "role": "system",
                     },
                     {
                         "content": "answer",
-                        "role": "assistant",
+                        "name": "example_assistant",
+                        "role": "system",
                     },
                     {
                         "content": "You are an AI assistant that helps people find information.",
@@ -559,11 +561,9 @@ def test_post_makes_correct_call_to_openai_chat_completions_with_documents(
                         "role": "user",
                     },
                 ],
-                "model": "gpt-3.5-turbo",  # This is hardcoded in LangChain
+                "model": app_config.get("AZURE_OPENAI_MODEL"),
                 "max_tokens": int(app_config.get("AZURE_OPENAI_MAX_TOKENS")),
-                "n": 1,
-                "stream": False,
-                "temperature": float(app_config.get("AZURE_OPENAI_TEMPERATURE")),
+                "temperature": 0,
             },
             headers={
                 "Accept": "application/json",
diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py
index ec99d5203..c49252869 100644
--- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py
+++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py
@@ -228,11 +228,13 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
                     },
                     {
                         "content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"Dual Transformer Encoder (DTE) DTE (https://dev.azure.com/TScience/TSciencePublic/_wiki/wikis/TSciencePublic.wiki/82/Dual-Transformer-Encoder) DTE is a general pair-oriented sentence representation learning framework based on transformers. It provides training, inference and evaluation for sentence similarity models. Model Details DTE can be used to train a model for sentence similarity with the following features: - Build upon existing transformer-based text representations (e.g.TNLR, BERT, RoBERTa, BAG-NLR) - Apply smoothness inducing technology to improve the representation robustness - SMART (https://arxiv.org/abs/1911.03437) SMART - Apply NCE (Noise Contrastive Estimation) based similarity learning to speed up training of 100M pairs We use pretrained DTE model"}},{"[doc2]":{"content":"trained on internal data. You can find more details here - Models.md (https://dev.azure.com/TScience/_git/TSciencePublic?path=%2FDualTransformerEncoder%2FMODELS.md&version=GBmaster&_a=preview) Models.md DTE-pretrained for In-context Learning Research suggests that finetuned transformers can be used to retrieve semantically similar exemplars for e.g. KATE (https://arxiv.org/pdf/2101.06804.pdf) KATE . They show that finetuned models esp. tuned on related tasks give the maximum boost to GPT-3 in-context performance. DTE have lot of pretrained models that are trained on intent classification tasks. We can use these model embedding to find natural language utterances which are similar to our test utterances at test time. The steps are: 1. Embed"}},{"[doc3]":{"content":"train and test utterances using DTE model 2. For each test embedding, find K-nearest neighbors. 3. Prefix the prompt with nearest embeddings. The following diagram from the above paper (https://arxiv.org/pdf/2101.06804.pdf) the above paper visualizes this process: DTE-Finetuned This is an extension of DTE-pretrained method where we further finetune the embedding models for prompt crafting task. In summary, we sample random prompts from our training data and use them for GPT-3 inference for the another part of training data. Some prompts work better and lead to right results whereas other prompts lead"}},{"[doc4]":{"content":"to wrong completions. We finetune the model on the downstream task of whether a prompt is good or not based on whether it leads to right or wrong completion. This approach is similar to this paper: Learning To Retrieve Prompts for In-Context Learning (https://arxiv.org/pdf/2112.08633.pdf) this paper: Learning To Retrieve Prompts for In-Context Learning . This method is very general but it may require a lot of data to actually finetune a model to learn how to retrieve examples suitable for the downstream inference model like GPT-3."}}]}\n\n## User Question\nWhat features does the Dual Transformer Encoder (DTE) provide for sentence similarity models and in-context learning?',
-                        "role": "user",
+                        "name": "example_user",
+                        "role": "system",
                     },
                     {
                         "content": "The Dual Transformer Encoder (DTE) is a framework for sentence representation learning that can be used to train, infer, and evaluate sentence similarity models[doc1][doc2]. It builds upon existing transformer-based text representations and applies smoothness inducing technology and Noise Contrastive Estimation for improved robustness and faster training[doc1]. DTE also offers pretrained models for in-context learning, which can be used to find semantically similar natural language utterances[doc2]. These models can be further finetuned for specific tasks, such as prompt crafting, to enhance the performance of downstream inference models like GPT-3[doc2][doc3][doc4]. However, this finetuning may require a significant amount of data[doc3][doc4].",
-                        "role": "assistant",
+                        "name": "example_assistant",
+                        "role": "system",
                     },
                     {
                         "content": "You are an AI assistant that helps people find information.",
@@ -245,11 +247,9 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
                         "role": "user",
                     },
                 ],
-                "model": "gpt-3.5-turbo",  # This is hardcoded in LangChain
+                "model": app_config.get("AZURE_OPENAI_MODEL"),
                 "max_tokens": int(app_config.get("AZURE_OPENAI_MAX_TOKENS")),
-                "n": 1,
-                "stream": False,
-                "temperature": float(app_config.get("AZURE_OPENAI_TEMPERATURE")),
+                "temperature": 0,
             },
             headers={
                 "Accept": "application/json",
diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py
index ce9002a06..6a7842e1a 100644
--- a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py
+++ b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py
@@ -176,7 +176,7 @@ def test_post_makes_correct_call_to_openai_chat_completions(
                 "n": 1,
                 "presence_penalty": 0.0,
                 "stream": False,
-                "temperature": float(app_config.get("AZURE_OPENAI_TEMPERATURE")),
+                "temperature": 0.0,
                 "tools": [
                     {
                         "type": "function",
diff --git a/code/tests/utilities/tools/test_question_answer_tool.py b/code/tests/utilities/tools/test_question_answer_tool.py
index 769b81866..7b6de3fe3 100644
--- a/code/tests/utilities/tools/test_question_answer_tool.py
+++ b/code/tests/utilities/tools/test_question_answer_tool.py
@@ -4,7 +4,6 @@
 import pytest
 from backend.batch.utilities.common.answer import Answer
 from backend.batch.utilities.tools.question_answer_tool import QuestionAnswerTool
-from langchain_core.documents import Document
 from backend.batch.utilities.common.source_document import SourceDocument
 
 
@@ -14,6 +13,7 @@ def config_mock():
         "backend.batch.utilities.tools.question_answer_tool.ConfigHelper"
     ) as mock:
         config = mock.get_active_config_or_default.return_value
+        config.prompts.use_on_your_data_format = True
         config.prompts.answering_system_prompt = "mock answering system prompt"
         config.prompts.answering_user_prompt = (
             "Sources: {sources}, Question: {question}"
@@ -44,29 +44,22 @@ def env_helper_mock():
 
 
 @pytest.fixture(autouse=True)
-def LLMHelperMock():
+def llm_helper_mock():
     with patch("backend.batch.utilities.tools.question_answer_tool.LLMHelper") as mock:
-        yield mock
+        llm_helper = mock.return_value
 
+        mock_response = MagicMock()
+        mock_response.message.content = "mock content"
 
-@pytest.fixture(autouse=True)
-def LLMChainMock():
-    with patch("backend.batch.utilities.tools.question_answer_tool.LLMChain") as mock:
-        mock.return_value.return_value = {"text": "mock content"}
-
-        yield mock
-
+        llm_helper.get_chat_completion.return_value.choices = [mock_response]
+        llm_helper.get_chat_completion.return_value.usage.prompt_tokens = 100
+        llm_helper.get_chat_completion.return_value.usage.completion_tokens = 50
 
-@pytest.fixture(autouse=True)
-def get_openai_callback_mock():
-    with patch(
-        "backend.batch.utilities.tools.question_answer_tool.get_openai_callback"
-    ) as mock:
-        yield mock
+        yield llm_helper
 
 
 @pytest.fixture(autouse=True)
-def get_search_handler_mock():
+def search_handler_mock():
     with patch(
         "backend.batch.utilities.tools.question_answer_tool.Search.get_search_handler"
     ) as mock:
@@ -76,16 +69,7 @@ def get_search_handler_mock():
 
 
 @pytest.fixture(autouse=True)
-def get_source_documents_mock():
-    with patch(
-        "backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents"
-    ) as mock:
-        source_documents = mock.return_value
-        yield source_documents
-
-
-@pytest.fixture(autouse=True)
-def get_source_documents_yield():
+def source_documents_mock():
     with patch(
         "backend.batch.utilities.tools.question_answer_tool.Search.get_source_documents"
     ) as mock:
@@ -108,10 +92,12 @@ def get_source_documents_yield():
             ),
         ]
         mock.return_value = documents
-        yield mock
+        yield documents
 
 
-def test_answer_question_returns_source_documents():
+def test_answer_question_returns_source_documents(
+    source_documents_mock: list[SourceDocument],
+):
     # given
     tool = QuestionAnswerTool()
 
@@ -119,29 +105,12 @@ def test_answer_question_returns_source_documents():
     answer = tool.answer_question("mock question", [])
 
     # then
-    source_documents = answer.source_documents
-
-    assert len(source_documents) == 2
-
-    assert source_documents[0].id == "mock id"
-    assert source_documents[0].title == "mock title"
-    assert source_documents[0].content == "mock content"
-    assert source_documents[0].source == "mock source"
-    assert source_documents[0].chunk == 123
-    assert source_documents[0].offset == 123
-    assert source_documents[0].page_number == 123
-
-    assert source_documents[1].id == "mock id 2"
-    assert source_documents[1].title == "mock title 2"
-    assert source_documents[1].content == "mock content 2"
+    assert len(answer.source_documents) == 2
+    assert isinstance(answer.source_documents[0], SourceDocument)
+    assert answer.source_documents == source_documents_mock
 
-    assert source_documents[1].source == "mock source 2"
-    assert source_documents[1].chunk_id == "mock chunk id 2"
 
-
-def test_answer_question_returns_answer(
-    get_search_handler_mock, get_source_documents_yield
-):
+def test_answer_question_returns_answer():
     # given
     tool = QuestionAnswerTool()
 
@@ -154,67 +123,56 @@ def test_answer_question_returns_answer(
     assert answer.answer == "mock content"
 
 
-def test_get_openai_callback(get_openai_callback_mock: MagicMock):
+def test_tokens_included_in_answer():
     # given
-    cb = get_openai_callback_mock.return_value.__enter__.return_value
-    cb.prompt_tokens = 100
-    cb.completion_tokens = 50
     tool = QuestionAnswerTool()
 
     # when
     answer = tool.answer_question("mock question", [])
 
     # then
-    get_openai_callback_mock.assert_called_once()
+    assert isinstance(answer, Answer)
     assert answer.prompt_tokens == 100
     assert answer.completion_tokens == 50
 
 
-def test_correct_prompt_with_few_shot_example(
-    LLMHelperMock: MagicMock, LLMChainMock: MagicMock, get_source_documents_yield
-):
+def test_correct_prompt_with_few_shot_example(llm_helper_mock: MagicMock):
     # given
     tool = QuestionAnswerTool()
-    llm = LLMHelperMock.return_value.get_llm.return_value
-    answer_generator = LLMChainMock.return_value
 
     # when
     tool.answer_question("mock question", [])
 
     # then
-    expected_input = {
-        "sources": '{"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}',
-        "question": "mock question",
-        "chat_history": [],
-    }
-
-    answer_generator.assert_called_once_with(expected_input)
-
-    assert LLMChainMock.call_args[1]["llm"] == llm
-    assert LLMChainMock.call_args[1]["verbose"] is True
-
-    prompt = LLMChainMock.call_args[1]["prompt"]
-    prompt_test = prompt.format(**expected_input)
-
-    assert (
-        prompt_test
-        == """System: mock answering system prompt
-Human: Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question
-AI: mock example answer
-System: mock azure openai system message
-Human: Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question"""
+    llm_helper_mock.get_chat_completion.assert_called_once_with(
+        [
+            {"content": "mock answering system prompt", "role": "system"},
+            {
+                "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question',
+                "name": "example_user",
+                "role": "system",
+            },
+            {
+                "content": "mock example answer",
+                "name": "example_assistant",
+                "role": "system",
+            },
+            {"content": "mock azure openai system message", "role": "system"},
+            {
+                "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question',
+                "role": "user",
+            },
+        ],
+        temperature=0,
     )
 
 
+@patch("backend.batch.utilities.tools.question_answer_tool.warnings.warn")
 def test_correct_prompt_without_few_shot_example(
-    config_mock: MagicMock,
-    LLMChainMock: MagicMock,
-    get_search_handler_mock,
-    get_source_documents_yield,
+    warn_mock: MagicMock, config_mock: MagicMock, llm_helper_mock: MagicMock
 ):
     # given
     tool = QuestionAnswerTool()
-    answer_generator = LLMChainMock.return_value
     config_mock.example.documents = "  "
     config_mock.example.user_question = "  "
 
@@ -222,31 +180,26 @@ def test_correct_prompt_without_few_shot_example(
     tool.answer_question("mock question", [])
 
     # then
-    expected_input = {
-        "sources": '{"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}',
-        "question": "mock question",
-        "chat_history": [],
-    }
-
-    answer_generator.assert_called_once_with(expected_input)
-
-    prompt = LLMChainMock.call_args[1]["prompt"]
-    prompt_test = prompt.format(**expected_input)
-
-    assert (
-        prompt_test
-        == """System: mock answering system prompt
-System: mock azure openai system message
-Human: Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question"""
+    warn_mock.assert_called_once()
+
+    llm_helper_mock.get_chat_completion.assert_called_once_with(
+        [
+            {"content": "mock answering system prompt", "role": "system"},
+            {"content": "mock azure openai system message", "role": "system"},
+            {
+                "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question',
+                "role": "user",
+            },
+        ],
+        temperature=0,
     )
 
 
 def test_correct_prompt_with_few_shot_example_and_chat_history(
-    LLMChainMock: MagicMock, get_search_handler_mock, get_source_documents_yield
+    llm_helper_mock: MagicMock,
 ):
     # given
     tool = QuestionAnswerTool()
-    answer_generator = LLMChainMock.return_value
     chat_history = [
         {"role": "user", "content": "Hello"},
         {"role": "assistant", "content": "Hi, how can I help?"},
@@ -256,60 +209,58 @@ def test_correct_prompt_with_few_shot_example_and_chat_history(
     tool.answer_question("mock question", chat_history)
 
     # then
-    expected_input = {
-        "question": "mock question",
-        "sources": '{"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}',
-        "chat_history": chat_history,
-    }
-
-    answer_generator.assert_called_once_with(expected_input)
-
-    prompt = LLMChainMock.call_args[1]["prompt"]
-    prompt_test = prompt.format(**expected_input)
-
-    assert (
-        prompt_test
-        == """System: mock answering system prompt
-Human: Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question
-AI: mock example answer
-System: mock azure openai system message
-Human: Hello
-AI: Hi, how can I help?
-Human: Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question"""
+    llm_helper_mock.get_chat_completion.assert_called_once_with(
+        [
+            {"content": "mock answering system prompt", "role": "system"},
+            {
+                "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock example content"}}]}, Question: mock example user question',
+                "name": "example_user",
+                "role": "system",
+            },
+            {
+                "content": "mock example answer",
+                "name": "example_assistant",
+                "role": "system",
+            },
+            {"content": "mock azure openai system message", "role": "system"},
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi, how can I help?"},
+            {
+                "content": 'Sources: {"retrieved_documents":[{"[doc1]":{"content":"mock content"}},{"[doc2]":{"content":"mock content 2"}}]}, Question: mock question',
+                "role": "user",
+            },
+        ],
+        temperature=0,
     )
 
 
 def test_non_on_your_data_prompt_correct(
     config_mock: MagicMock,
-    LLMChainMock: MagicMock,
-    get_search_handler_mock,
-    get_source_documents_yield,
+    llm_helper_mock: MagicMock,
 ):
     # given
     tool = QuestionAnswerTool()
-    answer_generator = LLMChainMock.return_value
     config_mock.prompts.use_on_your_data_format = False
     config_mock.prompts.answering_user_prompt = (
         "Sources: {sources}, Question: {question}"
     )
 
     # when
-    tool.answer_question("mock question", [])
+    answer = tool.answer_question("mock question", [])
 
     # then
-    expected_input = {
-        "sources": """[doc1]: mock content\n\n[doc2]: mock content 2""",
-        "question": "mock question",
-    }
-
-    answer_generator.assert_called_once_with(expected_input)
-
-    prompt = LLMChainMock.call_args[1]["prompt"]
-    prompt_test = prompt.format(**expected_input)
+    assert isinstance(answer, Answer)
+    assert answer.question == "mock question"
+    assert answer.answer == "mock content"
 
-    assert (
-        prompt_test
-        == """Sources: [doc1]: mock content\n\n[doc2]: mock content 2, Question: mock question"""
+    llm_helper_mock.get_chat_completion.assert_called_once_with(
+        [
+            {
+                "content": "Sources: [doc1]: mock content\n\n[doc2]: mock content 2, Question: mock question",
+                "role": "user",
+            },
+        ],
+        temperature=0,
     )
 
 
@@ -323,31 +274,3 @@ def test_json_remove_whitespace(input: str, expected: str):
 
     # then
     assert result == expected
-
-
-def create_document_and_source_documents(
-    get_source_documents_mock, get_search_handler_mock
-):
-    document = Document("mock content")
-    document.metadata = {
-        "id": "mock id",
-        "title": "mock title",
-        "source": "mock source",
-        "chunk": "mock chunk",
-        "offset": "mock offset",
-        "page_number": "mock page number",
-    }
-    get_source_documents_mock.return_value = document
-    documents = []
-    documents.append(
-        SourceDocument(
-            id=document.metadata["id"],
-            content=document.page_content,
-            title=document.metadata["title"],
-            source=document.metadata["source"],
-            chunk=document.metadata["chunk"],
-            offset=document.metadata["offset"],
-            page_number=document.metadata["page_number"],
-        )
-    )
-    get_search_handler_mock.return_answer_source_documents.return_value = documents