7
7
from ragas import evaluate
8
8
from ragas .metrics import answer_relevancy , faithfulness
9
9
from src .shared .common_fn import load_embedding_model
10
+ from ragas .dataset_schema import SingleTurnSample
11
+ from ragas .metrics import BleuScore , RougeScore , SemanticSimilarity , ContextEntityRecall
12
+ from ragas .metrics ._factual_correctness import FactualCorrectness
13
+ from ragas .llms import LangchainLLMWrapper
14
+ from langchain_openai import ChatOpenAI
15
+ from langchain .embeddings import OpenAIEmbeddings
16
+ from ragas .embeddings import LangchainEmbeddingsWrapper
17
+ import nltk
18
+
19
+ nltk .download ('punkt' )
10
20
load_dotenv ()
11
21
12
22
EMBEDDING_MODEL = os .getenv ("RAGAS_EMBEDDING_MODEL" )
@@ -52,3 +62,41 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str):
52
62
except Exception as e :
53
63
logging .exception (f"Error during metrics evaluation: { e } " )
54
64
return {"error" : str (e )}
65
+
66
+
67
+ async def get_additional_metrics (question : str , contexts : list , answers : list , reference : str , model_name : str ):
68
+ """Calculates multiple metrics for given question, answers, contexts, and reference."""
69
+ try :
70
+ if ("diffbot" in model_name ) or ("ollama" in model_name ):
71
+ raise ValueError (f"Unsupported model for evaluation: { model_name } " )
72
+ llm , model_name = get_llm (model = model_name )
73
+ ragas_llm = LangchainLLMWrapper (llm )
74
+ embeddings = EMBEDDING_FUNCTION
75
+ embedding_model = LangchainEmbeddingsWrapper (embeddings = embeddings )
76
+ rouge_scorer = RougeScore ()
77
+ semantic_scorer = SemanticSimilarity ()
78
+ entity_recall_scorer = ContextEntityRecall ()
79
+ entity_recall_scorer .llm = ragas_llm
80
+ semantic_scorer .embeddings = embedding_model
81
+ metrics = []
82
+ for response , context in zip (answers , contexts ):
83
+ sample = SingleTurnSample (response = response , reference = reference )
84
+ rouge_score = await rouge_scorer .single_turn_ascore (sample )
85
+ rouge_score = round (rouge_score ,4 )
86
+ semantic_score = await semantic_scorer .single_turn_ascore (sample )
87
+ semantic_score = round (semantic_score , 4 )
88
+ if "gemini" in model_name :
89
+ entity_recall_score = "Not Available"
90
+ else :
91
+ entity_sample = SingleTurnSample (reference = reference , retrieved_contexts = [context ])
92
+ entity_recall_score = await entity_recall_scorer .single_turn_ascore (entity_sample )
93
+ entity_recall_score = round (entity_recall_score , 4 )
94
+ metrics .append ({
95
+ "rouge_score" : rouge_score ,
96
+ "semantic_score" : semantic_score ,
97
+ "context_entity_recall_score" : entity_recall_score
98
+ })
99
+ return metrics
100
+ except Exception as e :
101
+ logging .exception ("Error in get_additional_metrics" )
102
+ return {"error" : str (e )}
0 commit comments