Spaces:
Sleeping
Sleeping
File size: 2,639 Bytes
239fd97 826f9a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
import pandas as pd
from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
import pickle
from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
def get_eval_data(eval_pdf_paths:list,
regenerate=False,
path_to_save='eval_dataset'):
if regenerate:
print("regenerating")
# pdf_path = # Replace with your PDF
pdf_paths = eval_pdf_paths
all_chunks = []
for path in pdf_paths:
chunks = process_pdf(path)
all_chunks.append(chunks)
chunks_flattened = [x for xs in all_chunks for x in xs]
qa_generator = LLMBasedQAGenerator()
dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save)
return dataset
else:
if path_to_save:
dataset = pickle.load(open(path_to_save,'rb'))
return dataset
else:
raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ")
def evaluate(dataset):
groundedness_checker = GroundednessChecker()
eval_df = pd.DataFrame(data= dataset)
predictions = []
confidence_scores = []
for i,row in eval_df.iterrows():
groundedness_result = groundedness_checker.check(
question=row['question'],
answer=row['answer'],
context=row['context'])
predictions.append(groundedness_result['is_grounded'])
confidence_scores.append(groundedness_result['confidence'])
eval_df['predicted'] = predictions
eval_df['confidence'] = confidence_scores
accuracy = accuracy_score(eval_df['label'], eval_df['predicted'])
precision = precision_score(eval_df['label'], eval_df['predicted'])
recall = recall_score(eval_df['label'], eval_df['predicted'])
f1 = f1_score(eval_df['label'], eval_df['predicted'])
conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted'])
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)
# Usage
if __name__ == "__main__":
dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]])
evaluate(dataset)
|