Sasidhar commited on
Commit
239fd97
·
verified ·
1 Parent(s): 60b59f1

Update custom_models/groundedness_checker/evaluate_groundedness_model.py

Browse files
custom_models/groundedness_checker/evaluate_groundedness_model.py CHANGED
@@ -1,84 +1,83 @@
1
-
2
- from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
3
- import pandas as pd
4
- from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
5
- import pickle
6
- from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker
7
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
8
-
9
-
10
- def get_eval_data(eval_pdf_paths:list,
11
- regenerate=False,
12
- path_to_save='eval_dataset'):
13
-
14
-
15
- if regenerate:
16
- print("regenerating")
17
-
18
- # pdf_path = # Replace with your PDF
19
- pdf_paths = eval_pdf_paths
20
-
21
- all_chunks = []
22
-
23
- for path in pdf_paths:
24
- chunks = process_pdf(path)
25
- all_chunks.append(chunks)
26
-
27
- chunks_flattened = [x for xs in all_chunks for x in xs]
28
-
29
- qa_generator = LLMBasedQAGenerator()
30
-
31
- dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save)
32
-
33
- return dataset
34
- else:
35
- if path_to_save:
36
- dataset = pickle.load(open(path_to_save,'rb'))
37
- return dataset
38
- else:
39
- raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ")
40
-
41
- def evaluate(dataset):
42
- groundedness_checker = GroundednessChecker()
43
- eval_df = pd.DataFrame(data= dataset)
44
-
45
- predictions = []
46
- confidence_scores = []
47
-
48
- for i,row in eval_df.iterrows():
49
- groundedness_result = groundedness_checker.check(
50
- question=row['question'],
51
- answer=row['answer'],
52
- context=row['context'])
53
-
54
- predictions.append(groundedness_result['is_grounded'])
55
- confidence_scores.append(groundedness_result['confidence'])
56
-
57
- eval_df['predicted'] = predictions
58
- eval_df['confidence'] = confidence_scores
59
-
60
- accuracy = accuracy_score(eval_df['label'], eval_df['predicted'])
61
- precision = precision_score(eval_df['label'], eval_df['predicted'])
62
- recall = recall_score(eval_df['label'], eval_df['predicted'])
63
- f1 = f1_score(eval_df['label'], eval_df['predicted'])
64
- conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted'])
65
-
66
- # Print the results
67
- print("Accuracy:", accuracy)
68
- print("Precision:", precision)
69
- print("Recall:", recall)
70
- print("F1 Score:", f1)
71
- print("Confusion Matrix:\n", conf_matrix)
72
-
73
-
74
- # Usage
75
- if __name__ == "__main__":
76
- dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]])
77
- evaluate(dataset)
78
-
79
-
80
-
81
-
82
-
83
-
84
 
 
1
+
2
+ from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
3
+ import pandas as pd
4
+ from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
5
+ import pickle
6
+ from llmgaurdrails.model_inference.groundedness_checker import GroundednessChecker
7
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
8
+
9
+
10
+ def get_eval_data(eval_pdf_paths:list,
11
+ regenerate=False,
12
+ path_to_save='eval_dataset'):
13
+
14
+
15
+ if regenerate:
16
+ print("regenerating")
17
+
18
+ # pdf_path = # Replace with your PDF
19
+ pdf_paths = eval_pdf_paths
20
+
21
+ all_chunks = []
22
+
23
+ for path in pdf_paths:
24
+ chunks = process_pdf(path)
25
+ all_chunks.append(chunks)
26
+
27
+ chunks_flattened = [x for xs in all_chunks for x in xs]
28
+
29
+ qa_generator = LLMBasedQAGenerator()
30
+
31
+ dataset = qa_generator.generate_dataset(chunks_flattened ,persist_dataset=True,presisted_file_path=path_to_save)
32
+
33
+ return dataset
34
+ else:
35
+ if path_to_save:
36
+ dataset = pickle.load(open(path_to_save,'rb'))
37
+ return dataset
38
+ else:
39
+ raise ValueError("Please specify the path where the dataset was previously saved in the parameter 'path_to_save' ")
40
+
41
+ def evaluate(dataset):
42
+ groundedness_checker = GroundednessChecker()
43
+ eval_df = pd.DataFrame(data= dataset)
44
+
45
+ predictions = []
46
+ confidence_scores = []
47
+
48
+ for i,row in eval_df.iterrows():
49
+ groundedness_result = groundedness_checker.check(
50
+ question=row['question'],
51
+ answer=row['answer'],
52
+ context=row['context'])
53
+
54
+ predictions.append(groundedness_result['is_grounded'])
55
+ confidence_scores.append(groundedness_result['confidence'])
56
+
57
+ eval_df['predicted'] = predictions
58
+ eval_df['confidence'] = confidence_scores
59
+
60
+ accuracy = accuracy_score(eval_df['label'], eval_df['predicted'])
61
+ precision = precision_score(eval_df['label'], eval_df['predicted'])
62
+ recall = recall_score(eval_df['label'], eval_df['predicted'])
63
+ f1 = f1_score(eval_df['label'], eval_df['predicted'])
64
+ conf_matrix = confusion_matrix(eval_df['label'], eval_df['predicted'])
65
+
66
+ print("Accuracy:", accuracy)
67
+ print("Precision:", precision)
68
+ print("Recall:", recall)
69
+ print("F1 Score:", f1)
70
+ print("Confusion Matrix:\n", conf_matrix)
71
+
72
+
73
+ # Usage
74
+ if __name__ == "__main__":
75
+ dataset = get_eval_data(eval_pdf_paths=[["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]])
76
+ evaluate(dataset)
77
+
78
+
79
+
80
+
81
+
82
+
 
83