Skip to content

Commit 2c362e3

Browse files
kartikpersistenta-s-poornakaustubh-darekar
authored
Additional metrics using ground truth (#855)
* Updating ragas metrics * added the service for additional metrics * additional metrics api * Adding Rouge to requirement * changes done for additional metrics for gemini model * Additional metrics changes related to gemini model * Adding Rouge_Score Version * Api Integration * payload changes * payload fix * Fixing Eval Error * Adding fact_score metric * code refactoring * table integration * data binding * Integrated additional metrics on multimodes * removed fact score * Removing Fact Score * fix: Multimode fix * custommiddleware for gzip * removed unused state * message changes * uncommented gzipmiddleware * code refactoring * removed settings modal code * Table UI Fixes * removed state * UX improvements for chunks popup * added the status check * ndl version changes * tip and dropdown changes * icon fixes * contextmenu fix * Box CSS fix * icon fixes * icon changes * IsRoot fix * added the tooltip for metrics * Menu fix inside modal * hover color fix * menu changes * format and lint fixes --------- Co-authored-by: a-s-poorna <[email protected]> Co-authored-by: kaustubh-darekar <[email protected]>
1 parent e9c081c commit 2c362e3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2964
-2485
lines changed

backend/requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -179,5 +179,5 @@ PyMuPDF==1.24.5
179179
pypandoc==1.13
180180
graphdatascience==1.10
181181
Secweb==1.11.0
182-
ragas==0.1.14
183-
182+
ragas==0.2.2
183+
rouge_score==0.1.2

backend/score.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send):
8585
app.add_middleware(ContentSecurityPolicy, Option={'default-src': ["'self'"], 'base-uri': ["'self'"], 'block-all-mixed-content': []}, script_nonce=False, style_nonce=False, report_only=False)
8686
app.add_middleware(XContentTypeOptions)
8787
app.add_middleware(XFrame, Option={'X-Frame-Options': 'DENY'})
88-
#app.add_middleware(GZipMiddleware, minimum_size=1000, compresslevel=5)
8988
app.add_middleware(CustomGZipMiddleware, minimum_size=1000, compresslevel=5,paths=["/sources_list","/url/scan","/extract","/chat_bot","/chunk_entities","/get_neighbours","/graph_query","/schema","/populate_graph_schema","/get_unconnected_nodes_list","/get_duplicate_nodes","/fetch_chunktext"])
9089
app.add_middleware(
9190
CORSMiddleware,
@@ -847,6 +846,40 @@ async def calculate_metric(question: str = Form(),
847846
)
848847
finally:
849848
gc.collect()
849+
850+
851+
@app.post('/additional_metrics')
852+
async def calculate_additional_metrics(question: str = Form(),
853+
context: str = Form(),
854+
answer: str = Form(),
855+
reference: str = Form(),
856+
model: str = Form(),
857+
mode: str = Form(),
858+
):
859+
try:
860+
context_list = [str(item).strip() for item in json.loads(context)] if context else []
861+
answer_list = [str(item).strip() for item in json.loads(answer)] if answer else []
862+
mode_list = [str(item).strip() for item in json.loads(mode)] if mode else []
863+
result = await get_additional_metrics(question, context_list,answer_list, reference, model)
864+
if result is None or "error" in result:
865+
return create_api_response(
866+
'Failed',
867+
message='Failed to calculate evaluation metrics.',
868+
error=result.get("error", "Ragas evaluation returned null")
869+
)
870+
data = {mode: {metric: result[i][metric] for metric in result[i]} for i, mode in enumerate(mode_list)}
871+
return create_api_response('Success', data=data)
872+
except Exception as e:
873+
logging.exception(f"Error while calculating evaluation metrics: {e}")
874+
return create_api_response(
875+
'Failed',
876+
message="Error while calculating evaluation metrics",
877+
error=str(e)
878+
)
879+
finally:
880+
gc.collect()
881+
882+
850883

851884
@app.post("/fetch_chunktext")
852885
async def fetch_chunktext(

backend/src/QA_integration.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def QA_RAG(graph,model, question, document_names, session_id, mode, write_access
662662
if document_names and not chat_mode_settings["document_filter"]:
663663
result = {
664664
"session_id": "",
665-
"message": "This chat mode does support document selection",
665+
"message": "Please deselect all documents in the table before using this chat mode",
666666
"info": {
667667
"sources": [],
668668
"model": "",

backend/src/ragas_eval.py

+48
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from ragas import evaluate
88
from ragas.metrics import answer_relevancy, faithfulness
99
from src.shared.common_fn import load_embedding_model
10+
from ragas.dataset_schema import SingleTurnSample
11+
from ragas.metrics import BleuScore, RougeScore, SemanticSimilarity, ContextEntityRecall
12+
from ragas.metrics._factual_correctness import FactualCorrectness
13+
from ragas.llms import LangchainLLMWrapper
14+
from langchain_openai import ChatOpenAI
15+
from langchain.embeddings import OpenAIEmbeddings
16+
from ragas.embeddings import LangchainEmbeddingsWrapper
17+
import nltk
18+
19+
nltk.download('punkt')
1020
load_dotenv()
1121

1222
EMBEDDING_MODEL = os.getenv("RAGAS_EMBEDDING_MODEL")
@@ -52,3 +62,41 @@ def get_ragas_metrics(question: str, context: list, answer: list, model: str):
5262
except Exception as e:
5363
logging.exception(f"Error during metrics evaluation: {e}")
5464
return {"error": str(e)}
65+
66+
67+
async def get_additional_metrics(question: str, contexts: list, answers: list, reference: str, model_name: str):
68+
"""Calculates multiple metrics for given question, answers, contexts, and reference."""
69+
try:
70+
if ("diffbot" in model_name) or ("ollama" in model_name):
71+
raise ValueError(f"Unsupported model for evaluation: {model_name}")
72+
llm, model_name = get_llm(model=model_name)
73+
ragas_llm = LangchainLLMWrapper(llm)
74+
embeddings = EMBEDDING_FUNCTION
75+
embedding_model = LangchainEmbeddingsWrapper(embeddings=embeddings)
76+
rouge_scorer = RougeScore()
77+
semantic_scorer = SemanticSimilarity()
78+
entity_recall_scorer = ContextEntityRecall()
79+
entity_recall_scorer.llm = ragas_llm
80+
semantic_scorer.embeddings = embedding_model
81+
metrics = []
82+
for response, context in zip(answers, contexts):
83+
sample = SingleTurnSample(response=response, reference=reference)
84+
rouge_score = await rouge_scorer.single_turn_ascore(sample)
85+
rouge_score = round(rouge_score,4)
86+
semantic_score = await semantic_scorer.single_turn_ascore(sample)
87+
semantic_score = round(semantic_score, 4)
88+
if "gemini" in model_name:
89+
entity_recall_score = "Not Available"
90+
else:
91+
entity_sample = SingleTurnSample(reference=reference, retrieved_contexts=[context])
92+
entity_recall_score = await entity_recall_scorer.single_turn_ascore(entity_sample)
93+
entity_recall_score = round(entity_recall_score, 4)
94+
metrics.append({
95+
"rouge_score": rouge_score,
96+
"semantic_score": semantic_score,
97+
"context_entity_recall_score": entity_recall_score
98+
})
99+
return metrics
100+
except Exception as e:
101+
logging.exception("Error in get_additional_metrics")
102+
return {"error": str(e)}

frontend/package.json

+5-4
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
"@mui/material": "^5.15.10",
1616
"@mui/styled-engine": "^5.15.9",
1717
"@neo4j-devtools/word-color": "^0.0.8",
18-
"@neo4j-ndl/base": "^2.12.7",
19-
"@neo4j-ndl/react": "^2.16.9",
20-
"@neo4j-nvl/base": "^0.3.3",
21-
"@neo4j-nvl/react": "^0.3.3",
18+
"@neo4j-ndl/base": "^3.0.10",
19+
"@neo4j-ndl/react": "^3.0.17",
20+
"@neo4j-nvl/base": "^0.3.6",
21+
"@neo4j-nvl/react": "^0.3.6",
2222
"@react-oauth/google": "^0.12.1",
23+
"@tanstack/react-table": "^8.20.5",
2324
"@types/uuid": "^9.0.7",
2425
"axios": "^1.6.5",
2526
"clsx": "^2.1.1",

frontend/src/App.css

+10-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
}
2626

2727
.contentWithExpansion {
28-
width: calc(-840px + 100dvw);
28+
width: calc(-807px + 100dvw);
2929
height: calc(100dvh - 58px);
3030
padding: 3px;
3131
display: flex;
@@ -386,4 +386,13 @@
386386
.custom-menu {
387387
min-width: 250px;
388388
max-width: 305px;
389+
}
390+
.ndl-modal-root{
391+
z-index: 39 !important;
392+
}
393+
.tbody-dark .ndl-data-grid-tr:hover {
394+
--cell-background: rgb(60 63 68) !important;
395+
}
396+
.tbody-light .ndl-data-grid-tr:hover {
397+
--cell-background: rgb(226 227 229) !important;
389398
}

frontend/src/HOC/CustomModal.tsx

+4-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const CustomModal: React.FC<CustomModalProps> = ({
1616
return (
1717
<Dialog
1818
size='small'
19-
open={open}
19+
isOpen={open}
2020
modalProps={{
2121
id: 'default-menu',
2222
}}
@@ -25,16 +25,17 @@ const CustomModal: React.FC<CustomModalProps> = ({
2525
<Dialog.Content className='n-flex n-flex-col n-gap-token-4 mt-6'>
2626
{status !== 'unknown' && (
2727
<Banner
28-
closeable
28+
isCloseable
2929
description={statusMessage}
3030
onClose={() => setStatus('unknown')}
3131
type={status}
3232
name='Custom Banner'
33+
usage='inline'
3334
/>
3435
)}
3536
<div className='n-flex n-flex-row n-flex-wrap'>{children}</div>
3637
<Dialog.Actions className='mt-4'>
37-
<Button onClick={submitHandler} size='medium' disabled={isDisabled}>
38+
<Button onClick={submitHandler} size='medium' isDisabled={isDisabled}>
3839
{submitLabel}
3940
</Button>
4041
</Dialog.Actions>

frontend/src/HOC/withVisibility.tsx

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
interface VisibilityProps {
2+
isVisible: boolean;
3+
}
4+
export function withVisibility<P>(WrappedComponent: React.ComponentType<P>) {
5+
const VisibityControlled = (props: P & VisibilityProps) => {
6+
if (props.isVisible === false) {
7+
return null;
8+
}
9+
10+
return <WrappedComponent {...props} />;
11+
};
12+
13+
return VisibityControlled;
14+
}

0 commit comments

Comments
 (0)