Spaces:
Sleeping
Sleeping
from transformers import BertForQuestionAnswering, BertTokenizer | |
import torch | |
from summarize import summarize_text,extract_text_from_pdf # Import summarization function | |
def load_qa_model(model_path="D:/code/bert_easy/bert-large-uncased-whole-word-masking-finetuned-squad"): | |
"""Loads the BERT model and tokenizer for question answering.""" | |
model = BertForQuestionAnswering.from_pretrained(model_path) | |
tokenizer = BertTokenizer.from_pretrained(model_path) | |
return model, tokenizer | |
def get_answer(question, context, model, tokenizer): | |
"""Generates an answer for a given question based on the provided context.""" | |
inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
answer_start = torch.argmax(outputs.start_logits) | |
answer_end = torch.argmax(outputs.end_logits) + 1 | |
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])) | |
return answer | |
if __name__ == "__main__": | |
pdf_path = "C://Users/HP/Downloads/study/cis/CIS Fundamentals.pdf"# Replace with actual PDF file path | |
extracted_text = extract_text_from_pdf(pdf_path) | |
summary = summarize_text(extracted_text) | |
sample_question = "what is cloud computing ?" | |
model, tokenizer = load_qa_model() | |
answer = get_answer(sample_question, summary, model, tokenizer) # Use summary as context | |
print("Summary:", summary) | |
print("Answer:", answer) | |