22# SPDX-License-Identifier: Apache-2.0
33
44from os import environ
5- from datasets import Dataset
65
6+ from datasets import Dataset
77from langchain_community .chat_models import ChatOllama
88from langchain_community .embeddings import OllamaEmbeddings
99from langchain_openai import AzureChatOpenAI , AzureOpenAIEmbeddings
10+ from model .crew import run_crew
1011from ragas import evaluate
1112from ragas .embeddings import LangchainEmbeddingsWrapper
12- from model .crew import run_crew
1313from ragas .llms import LangchainLLMWrapper
14- from ragas .metrics import answer_correctness , answer_relevancy , answer_similarity , context_entity_recall , context_precision , context_recall , faithfulness , summarization_score
15- from ragas .metrics ._aspect_critic import harmfulness , maliciousness , coherence , correctness , conciseness
14+ from ragas .metrics import (
15+ answer_correctness ,
16+ answer_relevancy ,
17+ answer_similarity ,
18+ context_entity_recall ,
19+ context_precision ,
20+ context_recall ,
21+ faithfulness ,
22+ summarization_score ,
23+ )
24+ from ragas .metrics ._aspect_critic import (
25+ coherence ,
26+ conciseness ,
27+ correctness ,
28+ harmfulness ,
29+ maliciousness ,
30+ )
1631
1732azure_openai_api_key = environ .get ("AZURE_OPENAI_API_KEY" , "NA" )
1833azure_openai_endpoint = environ .get ("AZURE_OPENAI_ENDPOINT" , "NA" )
2338eval_model_name = environ .get ("LOCAL_MODEL_NAME" , "llama3.1" )
2439eval_base_url = environ .get ("LOCAL_MODEL_BASE_URL" , "http://localhost:11434/v1/" )
2540
41+
2642def eval ():
2743 if azure_openai_api_key != "NA" :
2844 print ("Set Azure OpenAI model for evaluation" )
@@ -40,20 +56,14 @@ def eval():
4056 api_version = openai_api_version ,
4157 azure_endpoint = azure_openai_endpoint ,
4258 azure_deployment = azure_deployment_name ,
43- model = azure_model_version
59+ model = azure_model_version ,
4460 )
4561 )
4662 else :
4763 print ("Set local model for evaluation" )
48- evaluator_llm = LangchainLLMWrapper (
49- ChatOllama (
50- model = eval_model_name
51- )
52- )
64+ evaluator_llm = LangchainLLMWrapper (ChatOllama (model = eval_model_name ))
5365 evaluator_embeddings = LangchainEmbeddingsWrapper (
54- OllamaEmbeddings (
55- model = eval_model_name
56- )
66+ OllamaEmbeddings (model = eval_model_name )
5767 )
5868
5969 test_input = "Gather data about new Critical CVEs from todays date"
@@ -63,13 +73,13 @@ def eval():
6373 print ("Task output: " + test_output )
6474
6575 d = dict ()
66- d [' question' ] = [test_input ]
67- d [' answer' ] = [test_output ]
68- d [' context' ] = [[]]
69- d [' retrieval_context' ] = [[]]
70- d [' reference' ] = ['' ]
71- d [' reference_contexts' ] = [[]]
72- d [' retrieved_contexts' ] = [[]]
76+ d [" question" ] = [test_input ]
77+ d [" answer" ] = [test_output ]
78+ d [" context" ] = [[]]
79+ d [" retrieval_context" ] = [[]]
80+ d [" reference" ] = ["" ]
81+ d [" reference_contexts" ] = [[]]
82+ d [" retrieved_contexts" ] = [[]]
7383 dataset = Dataset .from_dict (d )
7484
7585 score = evaluate (
@@ -90,9 +100,12 @@ def eval():
90100 summarization_score ,
91101 ],
92102 evaluator_llm ,
93- evaluator_embeddings
103+ evaluator_embeddings ,
94104 )
95105 print ("SCORE" , score )
96106
107+ return (dataset , score )
108+
109+
97110if __name__ == "__main__" :
98111 eval ()
0 commit comments