File size: 2,312 Bytes
2b77bf1
 
 
 
 
 
 
b91137f
2b77bf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791a1d3
2b77bf1
 
 
 
 
 
 
 
 
68b0c79
2b77bf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9ea747
 
 
 
 
2b77bf1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline

os.environ["OPENAI_API_KEY"] = "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"

# Read data
with open("./data/full_context.txt", "r") as file1:
    doc = file1.read()

# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(doc)


# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)

# Load model
model_path = "./models/roberta_model"

model = TFAutoModelForQuestionAnswering.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')

# Initialize Transformer pipeline with our own model and tokenizer
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)

def findHighestScore(question):
    docs_found = docsearch.similarity_search(question)
    doc_score = 0.01
    doc_answer = ''

    for doc in docs_found:
        doc_result = question_answerer(question=question, context = doc.page_content)
        if doc_result['score'] > doc_score:
            doc_score = doc_result['score']
            doc_answer = doc_result['answer']
    
    return doc_answer, doc_score


def QnAfunction(question):
    answer1, score1 = findHighestScore(question)
    if answer1 != '':
        return answer1, score1
        # print("Answer: ", answer1)
        # print("Score: ", score1)

    else:
        return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
        # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")


text = st.text_area("Ask any question about the Bachelor of Computer Science program at Swinburne: ")
if text:
    ans, score = QnAfunction(text)
    if score > 0.5:
        st.write("Answer: ", ans)
        st.write("Score: ", score)
    else:
        st.write(ans)