Spaces:

Kelvinhjk
/

QnA_chatbot_for_Swinburne_cs_course

Runtime error

File size: 3,903 Bytes

2b77bf1
 
640fbe7
2b77bf1
 
 
 
 
5a91724
2b77bf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640fbe7
 
 
 
 
 
2b77bf1
640fbe7
 
 
 
2b77bf1
640fbe7
2b77bf1
 
640fbe7
 
 
 
2b77bf1
640fbe7
 
 
 
2b77bf1
640fbe7
 
2b77bf1
 
 
 
 
 
640fbe7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa9ebfa
 
640fbe7
fa9ebfa
 
 
 
640fbe7
fa9ebfa
640fbe7
 
 
 
 
fa9ebfa
 
640fbe7
fa9ebfa
84a2bc0
fa9ebfa
 
640fbe7
fa9ebfa

import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline

os.environ["OPENAI_API_KEY"] = "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"

# Read data
with open("./data/full_context.txt", "r") as file1:
    doc = file1.read()

# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(doc)

# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)

# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)

# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)


def QnAfunction(question, QnAmodel):
    docs_found = docsearch.similarity_search(question)
    score = 0.5
    answer = ''
    for doc in docs_found:
        doc_result = QnAmodel(question=question, context = doc.page_content)
        if doc_result['score'] > score:
            score = doc_result['score']
            answer = doc_result['answer']

    if answer != '':
        return answer, score
        # print("Answer: ", answer1)
        # print("Score: ", score1)
    else:
        return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
        # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")

# GUI with Streamlit
st.markdown("""
    <style> 
        .big-font {
            margin: 50px 0 10px 0 !important;
            font-size:25px !important;
            font-weight: bold !important;
        }
    </style>
    """, unsafe_allow_html=True)

with st.sidebar:
    selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"], 
        icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)

if selected == "Roberta base squad2":
    st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
    st.write("- ", selected)
    text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
    if text0:
        #######
        ans0, score0 = QnAfunction(text0, question_answerer0)
        if score0 > 0.5:
            st.write("Answer: ", ans0)
            st.write("Score: ", score0)
        else:
            st.write(ans0)


elif selected == "Bert finetuned squad":
    st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
    st.write("- ", selected)
    text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
    if text1:
        # Fed in the question to the model
        ans1, score1 = QnAfunction(text1, question_answerer1)
        if score1 > 0.5:
            st.write("Answer: ", ans1)
            st.write("Score: ", score1)
        else:
            st.write(ans1)