File size: 3,286 Bytes
d064c90
 
 
 
 
 
 
 
 
 
 
 
 
6581e27
 
adb646f
d064c90
6581e27
d064c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc58e4f
d064c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
from doc_loading import read_pdf_text
from llm_functions import get_conversational_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()
from dataclasses import dataclass

import os

api_key = os.getenv("OPENAI_API_KEY")
from openai import OpenAI
client = OpenAI(api_key=api_key)


st.set_page_config(page_title="Resume QA")
st.header("Feel free to ask any question")

st.write("Welcome!!")
# st.session_state


if "option" not in st.session_state:
    option = st.selectbox("How are you going to input your document?",
             ("Upload LinkedIn PDF", "Upload xml"))
# Conditionally show components based on user's choice
file_name = ""
main_text = ""

if option == "Upload LinkedIn PDF":
    uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"], key="uploading_pdf")
    if uploaded_file is not None:
        # Process the uploaded file
        st.write("PDF uploaded successfully!")
        main_text = read_pdf_text(uploaded_file)

else:
    st.write("This functionality in under construction.")


if option == "Upload LinkedIn PDF":
    if uploaded_file is not None and uploaded_file.type != "application/pdf":
        st.error("Please choose a PDF file only.")


if len(main_text) > 0:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex='\n'
    )
    texts = text_splitter.create_documents([main_text])

    embeddings = OpenAIEmbeddings(api_key=api_key)
    # store in vector db
    db = FAISS.from_documents(texts, embeddings)

    hash_name = f"{option.replace(' ', '-')}"
    db.save_local(f'faiss_{hash_name}_index')

    if "messages" not in st.session_state:
        st.session_state.messages = []

    st.write("What's your question? (try being a bit detailed about the question)")

    @dataclass
    class Message:
        actor: str
        payload: str

    USER = "user"
    ASSISTANT = "ai"
    MESSAGES = "messages"
    def initialize_session_state():
        if MESSAGES not in st.session_state:
            st.session_state[MESSAGES] = [Message(actor=ASSISTANT, payload="Hi!How can I help you?")]
        if "llm_chain" not in st.session_state:
            st.session_state["llm_chain"] = get_conversational_chain()

    def get_llm_chain_from_session():
        return st.session_state["llm_chain"]

    initialize_session_state()

    msg: Message
    for msg in st.session_state[MESSAGES]:
        st.chat_message(msg.actor).write(msg.payload)

    prompt = st.chat_input("Enter a prompt here")

    if prompt:
        docs = db.similarity_search(prompt, k=5)
        st.session_state[MESSAGES].append(Message(actor=USER, payload=prompt))
        st.chat_message(USER).write(prompt)

        with st.spinner("Please wait.."):     
            llm_chain = get_llm_chain_from_session()
            response = llm_chain({"context": docs, "question": prompt})['text']

            st.session_state[MESSAGES].append(Message(actor=ASSISTANT, payload=response))
            st.chat_message(ASSISTANT).write(response)