Skip to content

feat: Remove LangChain from question answer tool #939

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 67 additions & 76 deletions code/backend/batch/utilities/tools/question_answer_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,13 @@
import logging
import warnings

from ..common.answer import Answer
from ..common.source_document import SourceDocument
from ..search.search import Search
from .answering_tool_base import AnsweringToolBase

from langchain.chains.llm import LLMChain
from langchain.prompts import (
AIMessagePromptTemplate,
ChatPromptTemplate,
FewShotChatMessagePromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
PromptTemplate,
)
from langchain_community.callbacks import get_openai_callback
from langchain_core.messages import SystemMessage

from ..helpers.config.config_helper import ConfigHelper
from ..helpers.llm_helper import LLMHelper
from ..helpers.env_helper import EnvHelper
from ..common.answer import Answer
from ..helpers.llm_helper import LLMHelper
from ..search.search import Search
from .answering_tool_base import AnsweringToolBase

logger = logging.getLogger(__name__)

Expand All @@ -46,27 +33,36 @@ def json_remove_whitespace(obj: str) -> str:
except json.JSONDecodeError:
return obj

def generate_llm_chain(self, question: str, sources: list[dict]):
answering_prompt = PromptTemplate(
template=self.config.prompts.answering_user_prompt,
input_variables=["question", "sources"],
)
@staticmethod
def clean_chat_history(chat_history: list[dict]) -> list[dict]:
return [
{
"content": message["content"],
"role": message["role"],
}
for message in chat_history
]

def generate_messages(self, question: str, sources: list[SourceDocument]):
sources_text = "\n\n".join(
[f"[doc{i+1}]: {source.content}" for i, source in enumerate(sources)]
)

return answering_prompt, {
"sources": sources_text,
"question": question,
}
return [
{
"content": self.config.prompts.answering_user_prompt.format(
question=question, sources=sources_text
),
"role": "user",
},
]

def generate_on_your_data_llm_chain(
def generate_on_your_data_messages(
self,
question: str,
chat_history: list[dict],
sources: list[SourceDocument],
):
) -> list[dict]:
examples = []

few_shot_example = {
Expand All @@ -82,38 +78,28 @@ def generate_on_your_data_llm_chain(

if any(few_shot_example.values()):
if all((few_shot_example.values())):
examples.append(few_shot_example)
examples.append(
{
"content": self.config.prompts.answering_user_prompt.format(
sources=few_shot_example["sources"],
question=few_shot_example["question"],
),
"name": "example_user",
"role": "system",
}
)
examples.append(
{
"content": few_shot_example["answer"],
"name": "example_assistant",
"role": "system",
}
)
else:
warnings.warn(
"Not all example fields are set in the config. Skipping few-shot example."
)

example_prompt = ChatPromptTemplate.from_messages(
[
HumanMessagePromptTemplate.from_template(
self.config.prompts.answering_user_prompt
),
AIMessagePromptTemplate.from_template("{answer}"),
]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
example_prompt=example_prompt,
examples=examples,
)

answering_prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(content=self.config.prompts.answering_system_prompt),
few_shot_prompt,
SystemMessage(content=self.env_helper.AZURE_OPENAI_SYSTEM_MESSAGE),
MessagesPlaceholder("chat_history"),
HumanMessagePromptTemplate.from_template(
self.config.prompts.answering_user_prompt
),
]
)

documents = json.dumps(
{
"retrieved_documents": [
Expand All @@ -124,47 +110,52 @@ def generate_on_your_data_llm_chain(
separators=(",", ":"),
)

return answering_prompt, {
"sources": documents,
"question": question,
"chat_history": chat_history,
}
return [
{
"content": self.config.prompts.answering_system_prompt,
"role": "system",
},
*examples,
{
"content": self.env_helper.AZURE_OPENAI_SYSTEM_MESSAGE,
"role": "system",
},
*QuestionAnswerTool.clean_chat_history(chat_history),
{
"content": self.config.prompts.answering_user_prompt.format(
sources=documents,
question=question,
),
"role": "user",
},
]

def answer_question(
self, question: str, chat_history: list[SourceDocument], **kwargs
):
def answer_question(self, question: str, chat_history: list[dict], **kwargs):
source_documents = Search.get_source_documents(self.search_handler, question)

if self.config.prompts.use_on_your_data_format:
answering_prompt, input = self.generate_on_your_data_llm_chain(
messages = self.generate_on_your_data_messages(
question, chat_history, source_documents
)
else:
warnings.warn(
"Azure OpenAI On Your Data prompt format is recommended and should be enabled in the Admin app.",
)
answering_prompt, input = self.generate_llm_chain(
question, source_documents
)
messages = self.generate_messages(question, source_documents)

llm_helper = LLMHelper()

answer_generator = LLMChain(
llm=llm_helper.get_llm(), prompt=answering_prompt, verbose=self.verbose
)

with get_openai_callback() as cb:
result = answer_generator(input)
response = llm_helper.get_chat_completion(messages)

answer = result["text"]
answer = response.choices[0].message.content
logger.debug(f"Answer: {answer}")

# Generate Answer Object
clean_answer = Answer(
question=question,
answer=answer,
source_documents=source_documents,
prompt_tokens=cb.prompt_tokens,
completion_tokens=cb.completion_tokens,
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
)
return clean_answer
Original file line number Diff line number Diff line change
Expand Up @@ -542,11 +542,13 @@ def test_post_makes_correct_call_to_openai_chat_completions_with_documents(
},
{
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nuser question',
"role": "user",
"name": "example_user",
"role": "system",
},
{
"content": "answer",
"role": "assistant",
"name": "example_assistant",
"role": "system",
},
{
"content": "You are an AI assistant that helps people find information.",
Expand All @@ -559,11 +561,8 @@ def test_post_makes_correct_call_to_openai_chat_completions_with_documents(
"role": "user",
},
],
"model": "gpt-3.5-turbo", # This is hardcoded in LangChain
"model": app_config.get("AZURE_OPENAI_MODEL"),
"max_tokens": int(app_config.get("AZURE_OPENAI_MAX_TOKENS")),
"n": 1,
"stream": False,
"temperature": float(app_config.get("AZURE_OPENAI_TEMPERATURE")),
},
headers={
"Accept": "application/json",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,13 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
},
{
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"Dual Transformer Encoder (DTE) DTE (https://dev.azure.com/TScience/TSciencePublic/_wiki/wikis/TSciencePublic.wiki/82/Dual-Transformer-Encoder) DTE is a general pair-oriented sentence representation learning framework based on transformers. It provides training, inference and evaluation for sentence similarity models. Model Details DTE can be used to train a model for sentence similarity with the following features: - Build upon existing transformer-based text representations (e.g.TNLR, BERT, RoBERTa, BAG-NLR) - Apply smoothness inducing technology to improve the representation robustness - SMART (https://arxiv.org/abs/1911.03437) SMART - Apply NCE (Noise Contrastive Estimation) based similarity learning to speed up training of 100M pairs We use pretrained DTE model"}},{"[doc2]":{"content":"trained on internal data. You can find more details here - Models.md (https://dev.azure.com/TScience/_git/TSciencePublic?path=%2FDualTransformerEncoder%2FMODELS.md&version=GBmaster&_a=preview) Models.md DTE-pretrained for In-context Learning Research suggests that finetuned transformers can be used to retrieve semantically similar exemplars for e.g. KATE (https://arxiv.org/pdf/2101.06804.pdf) KATE . They show that finetuned models esp. tuned on related tasks give the maximum boost to GPT-3 in-context performance. DTE have lot of pretrained models that are trained on intent classification tasks. We can use these model embedding to find natural language utterances which are similar to our test utterances at test time. The steps are: 1. Embed"}},{"[doc3]":{"content":"train and test utterances using DTE model 2. For each test embedding, find K-nearest neighbors. 3. Prefix the prompt with nearest embeddings. The following diagram from the above paper (https://arxiv.org/pdf/2101.06804.pdf) the above paper visualizes this process: DTE-Finetuned This is an extension of DTE-pretrained method where we further finetune the embedding models for prompt crafting task. In summary, we sample random prompts from our training data and use them for GPT-3 inference for the another part of training data. Some prompts work better and lead to right results whereas other prompts lead"}},{"[doc4]":{"content":"to wrong completions. We finetune the model on the downstream task of whether a prompt is good or not based on whether it leads to right or wrong completion. This approach is similar to this paper: Learning To Retrieve Prompts for In-Context Learning (https://arxiv.org/pdf/2112.08633.pdf) this paper: Learning To Retrieve Prompts for In-Context Learning . This method is very general but it may require a lot of data to actually finetune a model to learn how to retrieve examples suitable for the downstream inference model like GPT-3."}}]}\n\n## User Question\nWhat features does the Dual Transformer Encoder (DTE) provide for sentence similarity models and in-context learning?',
"role": "user",
"name": "example_user",
"role": "system",
},
{
"content": "The Dual Transformer Encoder (DTE) is a framework for sentence representation learning that can be used to train, infer, and evaluate sentence similarity models[doc1][doc2]. It builds upon existing transformer-based text representations and applies smoothness inducing technology and Noise Contrastive Estimation for improved robustness and faster training[doc1]. DTE also offers pretrained models for in-context learning, which can be used to find semantically similar natural language utterances[doc2]. These models can be further finetuned for specific tasks, such as prompt crafting, to enhance the performance of downstream inference models like GPT-3[doc2][doc3][doc4]. However, this finetuning may require a significant amount of data[doc3][doc4].",
"role": "assistant",
"name": "example_assistant",
"role": "system",
},
{
"content": "You are an AI assistant that helps people find information.",
Expand All @@ -245,11 +247,8 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
"role": "user",
},
],
"model": "gpt-3.5-turbo", # This is hardcoded in LangChain
"model": app_config.get("AZURE_OPENAI_MODEL"),
"max_tokens": int(app_config.get("AZURE_OPENAI_MAX_TOKENS")),
"n": 1,
"stream": False,
"temperature": float(app_config.get("AZURE_OPENAI_TEMPERATURE")),
},
headers={
"Accept": "application/json",
Expand Down
Loading
Loading