Sasidhar commited on
Commit
6a36cff
·
verified ·
1 Parent(s): 7847f4e

Update custom_models/groundedness_checker/main.py

Browse files
custom_models/groundedness_checker/main.py CHANGED
@@ -1,47 +1,49 @@
1
-
2
- from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
3
- from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
4
- from llmgaurdrails.custom_models.groundedness_checker.grounding_classifier import GroundingTrainer
5
- from llmgaurdrails.custom_models.groundedness_checker.simple_qa_generator import SimpleQAGenerator
6
- from llmgaurdrails.custom_models.groundedness_checker.evaluate_groundedness_model import evaluate,get_eval_data
7
-
8
- # Usage
9
- if __name__ == "__main__":
10
-
11
- # pdf_path = # Replace with your PDF
12
- trainning_pdf_paths = ["D:\Sasidhar\Projects\cba\data\CreditCard.pdf" ,
13
- "D:\Sasidhar\Projects\cba\data\home_insurance_pds.pdf"]
14
-
15
- eval_pdf_paths = ["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"]
16
-
17
- all_chunks = []
18
-
19
- for path in trainning_pdf_paths:
20
- chunks = process_pdf(trainning_pdf_paths[0])
21
- all_chunks.append(chunks)
22
-
23
- chunks_flattened = [x for xs in all_chunks for x in xs]
24
-
25
- # generate qa dataset
26
- qa_generator = LLMBasedQAGenerator()
27
-
28
- dataset = qa_generator.generate_dataset(chunks_flattened,persist_dataset=True)
29
-
30
- trainer = GroundingTrainer()
31
- trainer.train(dataset)
32
-
33
- eval_dataset = get_eval_data(eval_pdf_paths=eval_pdf_paths)
34
- evaluate(dataset)
35
- # Accuracy: 0.8952380952380953
36
- # Precision: 0.8738738738738738
37
- # Recall: 0.9238095238095239
38
- # F1 Score: 0.8981481481481481
39
-
40
-
41
-
42
-
43
-
44
-
45
-
46
-
 
 
47
 
 
1
+
2
+ from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf
3
+ from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator
4
+ from llmgaurdrails.custom_models.groundedness_checker.grounding_classifier import GroundingTrainer
5
+ from llmgaurdrails.custom_models.groundedness_checker.simple_qa_generator import SimpleQAGenerator
6
+ from llmgaurdrails.custom_models.groundedness_checker.evaluate_groundedness_model import evaluate,get_eval_data
7
+
8
+ if __name__ == "__main__":
9
+
10
+ # Replace with your PDF Files
11
+ trainning_pdf_paths = ["D:\Sasidhar\Projects\cba\data\CreditCard.pdf" ,
12
+ "D:\Sasidhar\Projects\cba\data\home_insurance_pds.pdf"]
13
+
14
+ eval_pdf_paths = ["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\PrivateBanking.pdf"]
15
+
16
+ all_chunks = []
17
+
18
+ for path in trainning_pdf_paths:
19
+ chunks = process_pdf(trainning_pdf_paths[0])
20
+ all_chunks.append(chunks)
21
+
22
+ chunks_flattened = [x for xs in all_chunks for x in xs]
23
+
24
+ # generate qa dataset
25
+ qa_generator = LLMBasedQAGenerator()
26
+
27
+ dataset = qa_generator.generate_dataset(chunks_flattened,persist_dataset=True)
28
+
29
+ trainer = GroundingTrainer()
30
+ trainer.train(dataset)
31
+
32
+ eval_dataset = get_eval_data(eval_pdf_paths=eval_pdf_paths)
33
+ evaluate(dataset)
34
+
35
+ # Result on test dataset - This is wrong as these numbers were obtained on a trained dataset by mistake . Will fix before the presentation.
36
+
37
+ # Accuracy: 0.8952380952380953
38
+ # Precision: 0.8738738738738738
39
+ # Recall: 0.9238095238095239
40
+ # F1 Score: 0.8981481481481481
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49