llmgaurdrails / custom_models /groundedness_checker /evaluate_groundedness_model.py
Sasidhar's picture
Upload 16 files
826f9a4 verified
raw
history blame
2.75 kB
from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
import pandas as pd
from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
import pickle
from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
def get_eval_data(eval_pdf_paths:list,
regenerate=False,
path_to_save='eval_dataset'):
if regenerate:
print("regenerating")
# pdf_path = # Replace with your PDF
pdf_paths = eval_pdf_paths
all_chunks = []
for path in pdf_paths:
chunks = process_pdf(path)
all_chunks.append(chunks)
chunks_flattened = [x for xs in all_chunks for x in xs]
qa_generator = LLMBasedQAGenerator()
dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save)
return dataset
else:
if path_to_save:
dataset = pickle.load(open(path_to_save,'rb'))
return dataset
else:
raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ")
def evaluate(dataset):
groundedness_checker = GroundednessChecker()
eval_df = pd.DataFrame(data= dataset)
predictions = []
confidence_scores = []
for i,row in eval_df.iterrows():
groundedness_result = groundedness_checker.check(
question=row['question'],
answer=row['answer'],
context=row['context'])
predictions.append(groundedness_result['is_grounded'])
confidence_scores.append(groundedness_result['confidence'])
eval_df['predicted'] = predictions
eval_df['confidence'] = confidence_scores
accuracy = accuracy_score(eval_df['label'], eval_df['predicted'])
precision = precision_score(eval_df['label'], eval_df['predicted'])
recall = recall_score(eval_df['label'], eval_df['predicted'])
f1 = f1_score(eval_df['label'], eval_df['predicted'])
conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted'])
# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)
# Usage
if __name__ == "__main__":
dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]])
evaluate(dataset)