Spaces:
Sleeping
Sleeping
from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf | |
import pandas as pd | |
from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator | |
import pickle | |
from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix | |
def get_eval_data(eval_pdf_paths:list, | |
regenerate=False, | |
path_to_save='eval_dataset'): | |
if regenerate: | |
print("regenerating") | |
# pdf_path = # Replace with your PDF | |
pdf_paths = eval_pdf_paths | |
all_chunks = [] | |
for path in pdf_paths: | |
chunks = process_pdf(path) | |
all_chunks.append(chunks) | |
chunks_flattened = [x for xs in all_chunks for x in xs] | |
qa_generator = LLMBasedQAGenerator() | |
dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save) | |
return dataset | |
else: | |
if path_to_save: | |
dataset = pickle.load(open(path_to_save,'rb')) | |
return dataset | |
else: | |
raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ") | |
def evaluate(dataset): | |
groundedness_checker = GroundednessChecker() | |
eval_df = pd.DataFrame(data= dataset) | |
predictions = [] | |
confidence_scores = [] | |
for i,row in eval_df.iterrows(): | |
groundedness_result = groundedness_checker.check( | |
question=row['question'], | |
answer=row['answer'], | |
context=row['context']) | |
predictions.append(groundedness_result['is_grounded']) | |
confidence_scores.append(groundedness_result['confidence']) | |
eval_df['predicted'] = predictions | |
eval_df['confidence'] = confidence_scores | |
accuracy = accuracy_score(eval_df['label'], eval_df['predicted']) | |
precision = precision_score(eval_df['label'], eval_df['predicted']) | |
recall = recall_score(eval_df['label'], eval_df['predicted']) | |
f1 = f1_score(eval_df['label'], eval_df['predicted']) | |
conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted']) | |
# Print the results | |
print("Accuracy:", accuracy) | |
print("Precision:", precision) | |
print("Recall:", recall) | |
print("F1 Score:", f1) | |
print("Confusion Matrix:\n", conf_matrix) | |
# Usage | |
if __name__ == "__main__": | |
dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]]) | |
evaluate(dataset) | |