File size: 2,153 Bytes
c57ccc5
 
70f1dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ccc5
 
 
70f1dd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c57ccc5
 
 
 
 
 
 
 
 
 
 
 
a13b0da
 
70f1dd6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr

# Method 1
# from transformers import pipeline
# question_answer = pipeline('question-answering',model = 'distilbert/distilbert-base-cased-distilled-squad')

# def question_answering(context, question):

#     output = question_answer({
#         'context': context,
#         'question': question
#     })

#     return output['answer'], str(output['score'] * 100)


# Method 2
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import torch.nn.functional as F

tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-cased-distilled-squad")
model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-cased-distilled-squad")

def question_answering(context, question):

    inputs = tokenizer.encode_plus(question, context, return_tensors="pt")

    # Get input IDs and attention mask
    input_ids = inputs["input_ids"].tolist()[0]
    
    # Perform inference to get the start and end logits
    outputs = model(**inputs)
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    
    # Get the most likely beginning and end of answer with the argmax of the logits
    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits) + 1

    # Apply softmax to get probabilities
    start_probs = F.softmax(start_logits, dim=-1)
    end_probs = F.softmax(end_logits, dim=-1)
    
    # Convert token IDs of the answer span back to text
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[start_index:end_index]))   
    
    # Calculate the confidence score
    confidence_score = start_probs[0][start_index].item() * end_probs[0][end_index-1].item()

    return answer, str(confidence_score * 100)


iface = gr.Interface(
    fn = question_answering,
    inputs = [ 
        gr.Textbox(label = "Context", placeholder = "Enter your context here..", lines = 5),
        gr.Textbox(label = "Question", placeholder = "Enter your question", lines = 2)
    ],
    outputs = [
        gr.Textbox(label = "Answer", lines = 2),
        gr.Textbox(label = "Accuracy")
    ]
)

iface.launch()