File size: 1,566 Bytes
ad9331d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from transformers import BertForQuestionAnswering, BertTokenizer
import torch
from summarize import summarize_text,extract_text_from_pdf  # Import summarization function

def load_qa_model(model_path="D:/code/bert_easy/bert-large-uncased-whole-word-masking-finetuned-squad"):
    """Loads the BERT model and tokenizer for question answering."""
    model = BertForQuestionAnswering.from_pretrained(model_path)
    tokenizer = BertTokenizer.from_pretrained(model_path)
    return model, tokenizer

def get_answer(question, context, model, tokenizer):
    """Generates an answer for a given question based on the provided context."""
    inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits) + 1
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
    
    return answer

if __name__ == "__main__":
    pdf_path = "C://Users/HP/Downloads/study/cis/CIS Fundamentals.pdf"# Replace with actual PDF file path
    extracted_text = extract_text_from_pdf(pdf_path)
    summary = summarize_text(extracted_text)
    sample_question = "what is cloud computing ?"
    
    model, tokenizer = load_qa_model()
    answer = get_answer(sample_question, summary, model, tokenizer)  # Use summary as context
    print("Summary:", summary)
    print("Answer:", answer)