Spaces:
Runtime error
Runtime error
File size: 3,903 Bytes
2b77bf1 640fbe7 2b77bf1 5a91724 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 fa9ebfa 640fbe7 fa9ebfa 640fbe7 fa9ebfa 640fbe7 fa9ebfa 640fbe7 fa9ebfa 84a2bc0 fa9ebfa 640fbe7 fa9ebfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
os.environ["OPENAI_API_KEY"] = "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"
# Read data
with open("./data/full_context.txt", "r") as file1:
doc = file1.read()
# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200, #striding over the text
length_function = len,
)
texts = text_splitter.split_text(doc)
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)
# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)
# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
def QnAfunction(question, QnAmodel):
docs_found = docsearch.similarity_search(question)
score = 0.5
answer = ''
for doc in docs_found:
doc_result = QnAmodel(question=question, context = doc.page_content)
if doc_result['score'] > score:
score = doc_result['score']
answer = doc_result['answer']
if answer != '':
return answer, score
# print("Answer: ", answer1)
# print("Score: ", score1)
else:
return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
# print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
# GUI with Streamlit
st.markdown("""
<style>
.big-font {
margin: 50px 0 10px 0 !important;
font-size:25px !important;
font-weight: bold !important;
}
</style>
""", unsafe_allow_html=True)
with st.sidebar:
selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"],
icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)
if selected == "Roberta base squad2":
st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
st.write("- ", selected)
text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
if text0:
#######
ans0, score0 = QnAfunction(text0, question_answerer0)
if score0 > 0.5:
st.write("Answer: ", ans0)
st.write("Score: ", score0)
else:
st.write(ans0)
elif selected == "Bert finetuned squad":
st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
st.write("- ", selected)
text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
if text1:
# Fed in the question to the model
ans1, score1 = QnAfunction(text1, question_answerer1)
if score1 > 0.5:
st.write("Answer: ", ans1)
st.write("Score: ", score1)
else:
st.write(ans1)
|