Runtime error
Runtime error
File size: 3,863 Bytes
2b77bf1 640fbe7 2b77bf1 cc21bae 2b77bf1 5a91724 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 d686330 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 2b77bf1 640fbe7 a1d73f6 640fbe7 cc21bae 984eecc cc21bae 984eecc 640fbe7 5b9ca5f 640fbe7 fa9ebfa 640fbe7 fa9ebfa f900277 640fbe7 5b9ca5f 640fbe7 fa9ebfa 84a2bc0 fa9ebfa 640fbe7 fa9ebfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
from PIL import Image
os.environ["OPENAI_API_KEY"] = "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"
# Read data
with open("./data/full_context.txt", "r") as file1:
doc =
# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200, #striding over the text
length_function = len,
texts = text_splitter.split_text(doc)
# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)
# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)
# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
def QnAfunction(question, QnAmodel):
docs_found = docsearch.similarity_search(question)
score = 0.01
answer = ''
for doc in docs_found:
doc_result = QnAmodel(question=question, context = doc.page_content)
if doc_result['score'] > score:
score = doc_result['score']
answer = doc_result['answer']
if answer != '':
return answer, score
# print("Answer: ", answer1)
# print("Score: ", score1)
return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
# print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
# GUI with Streamlit
.big-font {
margin: 15px 0 10px 0 !important;
font-size:25px !important;
font-weight: bold !important;
""", unsafe_allow_html=True)
with st.sidebar:
selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"],
icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)
image ='Swinburne_Logo.png')
st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
st.write("- ", selected)
if selected == "Roberta base squad2":
text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?):", max_chars=350)
if text0:
ans0, score0 = QnAfunction(text0, question_answerer0)
if score0 > 0.5:
st.write("Answer: ", ans0)
st.write("Score: ", score0)
elif selected == "Bert finetuned squad":
text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ", max_chars=350)
if text1:
# Fed in the question to the model
ans1, score1 = QnAfunction(text1, question_answerer1)
if score1 > 0.5:
st.write("Answer: ", ans1)
st.write("Score: ", score1)