File size: 3,863 Bytes
2b77bf1
 
640fbe7
2b77bf1
 
 
 
cc21bae
2b77bf1
5a91724
2b77bf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640fbe7
 
 
 
 
 
2b77bf1
640fbe7
 
 
 
2b77bf1
640fbe7
2b77bf1
 
640fbe7
 
d686330
640fbe7
2b77bf1
640fbe7
 
 
 
2b77bf1
640fbe7
 
2b77bf1
 
 
 
 
 
640fbe7
 
 
 
a1d73f6
640fbe7
 
 
 
 
 
 
 
 
 
cc21bae
 
984eecc
cc21bae
984eecc
640fbe7
5b9ca5f
 
640fbe7
fa9ebfa
 
 
 
640fbe7
fa9ebfa
f900277
640fbe7
 
5b9ca5f
 
640fbe7
fa9ebfa
84a2bc0
fa9ebfa
 
640fbe7
fa9ebfa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
from PIL import Image

os.environ["OPENAI_API_KEY"] = "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"

# Read data
with open("./data/full_context.txt", "r") as file1:
    doc = file1.read()

# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(doc)

# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)

# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)

# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)


def QnAfunction(question, QnAmodel):
    docs_found = docsearch.similarity_search(question)
    score = 0.01
    answer = ''
    for doc in docs_found:
        doc_result = QnAmodel(question=question, context = doc.page_content)
        if doc_result['score'] > score:
            score = doc_result['score']
            answer = doc_result['answer']

    if answer != '':
        return answer, score
        # print("Answer: ", answer1)
        # print("Score: ", score1)
    else:
        return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
        # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")

# GUI with Streamlit
st.markdown("""
    <style> 
        .big-font {
            margin: 15px 0 10px 0 !important;
            font-size:25px !important;
            font-weight: bold !important;
        }
    </style>
    """, unsafe_allow_html=True)

with st.sidebar:
    selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"], 
        icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)

image = Image.open('Swinburne_Logo.png')
st.image(image)
st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
st.write("- ", selected)

if selected == "Roberta base squad2":
    text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?):", max_chars=350)
    if text0:
        #######
        ans0, score0 = QnAfunction(text0, question_answerer0)
        if score0 > 0.5:
            st.write("Answer: ", ans0)
            st.write("Score: ", score0)
        else:
            st.write(ans0)
            

elif selected == "Bert finetuned squad":
    text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ", max_chars=350)
    if text1:
        # Fed in the question to the model
        ans1, score1 = QnAfunction(text1, question_answerer1)
        if score1 > 0.5:
            st.write("Answer: ", ans1)
            st.write("Score: ", score1)
        else:
            st.write(ans1)