File size: 2,639 Bytes
239fd97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826f9a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
import pandas as pd
from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
import pickle 
from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


def get_eval_data(eval_pdf_paths:list,
                  regenerate=False,
                  path_to_save='eval_dataset'):

    
    if regenerate:
        print("regenerating")

        # pdf_path =  # Replace with your PDF
        pdf_paths = eval_pdf_paths
        
        all_chunks = []

        for path in pdf_paths:
            chunks = process_pdf(path)
            all_chunks.append(chunks)

        chunks_flattened = [x for xs in all_chunks for x in xs]
        
        qa_generator = LLMBasedQAGenerator()

        dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save)

        return dataset
    else:
        if path_to_save:
            dataset = pickle.load(open(path_to_save,'rb'))
            return dataset
        else:
            raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ")

def evaluate(dataset):
    groundedness_checker = GroundednessChecker()
    eval_df = pd.DataFrame(data=  dataset)

    predictions = []
    confidence_scores = []

    for i,row in eval_df.iterrows(): 
        groundedness_result = groundedness_checker.check(
        question=row['question'],
        answer=row['answer'],
        context=row['context'])

        predictions.append(groundedness_result['is_grounded'])
        confidence_scores.append(groundedness_result['confidence'])

    eval_df['predicted'] = predictions
    eval_df['confidence'] = confidence_scores

    accuracy = accuracy_score(eval_df['label'], eval_df['predicted'])
    precision = precision_score(eval_df['label'], eval_df['predicted'])
    recall = recall_score(eval_df['label'], eval_df['predicted'])
    f1 = f1_score(eval_df['label'], eval_df['predicted'])
    conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted'])

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:\n", conf_matrix)


# Usage
if __name__ == "__main__":
    dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]])
    evaluate(dataset)