Spaces:
Sleeping
Sleeping
from llmgaurdrails.custom_models.groundedness_checker.pdf_data_chunker import process_pdf | |
from llmgaurdrails.custom_models.groundedness_checker.llm_based_qa_generator import LLMBasedQAGenerator | |
from llmgaurdrails.custom_models.groundedness_checker.grounding_classifier import GroundingTrainer | |
from llmgaurdrails.custom_models.groundedness_checker.simple_qa_generator import SimpleQAGenerator | |
from llmgaurdrails.custom_models.groundedness_checker.evaluate_groundedness_model import evaluate,get_eval_data | |
# Usage | |
if __name__ == "__main__": | |
# pdf_path = # Replace with your PDF | |
trainning_pdf_paths = ["D:\Sasidhar\Projects\cba\data\CreditCard.pdf" , | |
"D:\Sasidhar\Projects\cba\data\home_insurance_pds.pdf"] | |
eval_pdf_paths = ["D:\Sasidhar\Projects\llm_gaurdrails\llmgaurdrails\data\CreditCard.pdf"] | |
all_chunks = [] | |
for path in trainning_pdf_paths: | |
chunks = process_pdf(trainning_pdf_paths[0]) | |
all_chunks.append(chunks) | |
chunks_flattened = [x for xs in all_chunks for x in xs] | |
# generate qa dataset | |
qa_generator = LLMBasedQAGenerator() | |
dataset = qa_generator.generate_dataset(chunks_flattened,persist_dataset=True) | |
trainer = GroundingTrainer() | |
trainer.train(dataset) | |
eval_dataset = get_eval_data(eval_pdf_paths=eval_pdf_paths) | |
evaluate(dataset) | |
# Accuracy: 0.8952380952380953 | |
# Precision: 0.8738738738738738 | |
# Recall: 0.9238095238095239 | |
# F1 Score: 0.8981481481481481 | |