Spaces:
Sleeping
Sleeping
Main. Uploaded all the files required to run this app
Browse files- README.md +5 -4
- app.py +105 -0
- doc_loading.py +24 -0
- llm_functions.py +31 -0
- requirements.txt +11 -0
README.md
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
---
|
2 |
title: LinkedIn Profile QA
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: LinkedIn Profile QA
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.32.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
license: unknown
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from doc_loading import read_pdf_text
|
3 |
+
from llm_functions import get_conversational_chain
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain_openai import OpenAIEmbeddings
|
6 |
+
from langchain.vectorstores import FAISS
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings('ignore')
|
9 |
+
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
load_dotenv()
|
12 |
+
from dataclasses import dataclass
|
13 |
+
|
14 |
+
secrets = st.secrets["openai"]
|
15 |
+
from openai import OpenAI
|
16 |
+
client = OpenAI(api_key=secrets["OPENAI_API_KEY"])
|
17 |
+
|
18 |
+
|
19 |
+
st.set_page_config(page_title="Resume QA")
|
20 |
+
st.header("Feel free to ask any question")
|
21 |
+
|
22 |
+
st.write("Welcome!!")
|
23 |
+
# st.session_state
|
24 |
+
|
25 |
+
|
26 |
+
if "option" not in st.session_state:
|
27 |
+
option = st.selectbox("How are you going to input your document?",
|
28 |
+
("Upload LinkedIn PDF", "Upload xml"))
|
29 |
+
# Conditionally show components based on user's choice
|
30 |
+
file_name = ""
|
31 |
+
main_text = ""
|
32 |
+
|
33 |
+
if option == "Upload LinkedIn PDF":
|
34 |
+
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"], key="uploading_pdf")
|
35 |
+
if uploaded_file is not None:
|
36 |
+
# Process the uploaded file
|
37 |
+
st.write("PDF uploaded successfully!")
|
38 |
+
main_text = read_pdf_text(uploaded_file)
|
39 |
+
|
40 |
+
else:
|
41 |
+
st.write("This functionality in under construction.")
|
42 |
+
|
43 |
+
|
44 |
+
if option == "Upload LinkedIn PDF":
|
45 |
+
if uploaded_file is not None and uploaded_file.type != "application/pdf":
|
46 |
+
st.error("Please choose a PDF file only.")
|
47 |
+
|
48 |
+
|
49 |
+
if len(main_text) > 0:
|
50 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
51 |
+
chunk_size=500,
|
52 |
+
chunk_overlap=50,
|
53 |
+
length_function=len,
|
54 |
+
is_separator_regex='\n'
|
55 |
+
)
|
56 |
+
texts = text_splitter.create_documents([main_text])
|
57 |
+
|
58 |
+
embeddings = OpenAIEmbeddings(api_key=secrets['OPENAI_API_KEY'])
|
59 |
+
# store in vector db
|
60 |
+
db = FAISS.from_documents(texts, embeddings)
|
61 |
+
|
62 |
+
hash_name = f"{option.replace(' ', '-')}"
|
63 |
+
db.save_local(f'faiss_{hash_name}_index')
|
64 |
+
|
65 |
+
if "messages" not in st.session_state:
|
66 |
+
st.session_state.messages = []
|
67 |
+
|
68 |
+
st.write("What's your question? (try being a bit detailed about the question)")
|
69 |
+
|
70 |
+
@dataclass
|
71 |
+
class Message:
|
72 |
+
actor: str
|
73 |
+
payload: str
|
74 |
+
|
75 |
+
USER = "user"
|
76 |
+
ASSISTANT = "ai"
|
77 |
+
MESSAGES = "messages"
|
78 |
+
def initialize_session_state():
|
79 |
+
if MESSAGES not in st.session_state:
|
80 |
+
st.session_state[MESSAGES] = [Message(actor=ASSISTANT, payload="Hi!How can I help you?")]
|
81 |
+
if "llm_chain" not in st.session_state:
|
82 |
+
st.session_state["llm_chain"] = get_conversational_chain()
|
83 |
+
|
84 |
+
def get_llm_chain_from_session():
|
85 |
+
return st.session_state["llm_chain"]
|
86 |
+
|
87 |
+
initialize_session_state()
|
88 |
+
|
89 |
+
msg: Message
|
90 |
+
for msg in st.session_state[MESSAGES]:
|
91 |
+
st.chat_message(msg.actor).write(msg.payload)
|
92 |
+
|
93 |
+
prompt = st.chat_input("Enter a prompt here")
|
94 |
+
|
95 |
+
if prompt:
|
96 |
+
docs = db.similarity_search(prompt, k=5)
|
97 |
+
st.session_state[MESSAGES].append(Message(actor=USER, payload=prompt))
|
98 |
+
st.chat_message(USER).write(prompt)
|
99 |
+
|
100 |
+
with st.spinner("Please wait.."):
|
101 |
+
llm_chain = get_llm_chain_from_session()
|
102 |
+
response = llm_chain({"context": docs, "question": prompt})['text']
|
103 |
+
|
104 |
+
st.session_state[MESSAGES].append(Message(actor=ASSISTANT, payload=response))
|
105 |
+
st.chat_message(ASSISTANT).write(response)
|
doc_loading.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import re
|
4 |
+
import os
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
+
|
7 |
+
def read_pdf_text(pdf_file):
|
8 |
+
# Reading the pdf
|
9 |
+
pdf_reader = PdfReader(pdf_file)
|
10 |
+
all_text = ""
|
11 |
+
# make it limited. min(5, len(pages))
|
12 |
+
for page in pdf_reader.pages:
|
13 |
+
all_text += page.extract_text()
|
14 |
+
return all_text
|
15 |
+
|
16 |
+
def parse_linkedin_pdf(pdf_text):
|
17 |
+
sections = re.split(r'\n(?=\b(?:Experience|Contact|Education|Top Skills|Languages|Honors-Awards)\b)', pdf_text)
|
18 |
+
parsed_data = {}
|
19 |
+
for section in sections:
|
20 |
+
lines = section.split('\n')
|
21 |
+
section_name = lines[0]
|
22 |
+
section_text = '\n'.join(lines[1:])
|
23 |
+
parsed_data[section_name] = section_text
|
24 |
+
return parsed_data
|
llm_functions.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_openai import ChatOpenAI
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from langchain.chains.question_answering import load_qa_chain
|
4 |
+
from langchain.chains import LLMChain
|
5 |
+
from langchain.memory import ConversationBufferMemory
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
|
8 |
+
import streamlit as st
|
9 |
+
secrets = st.secrets["openai"]
|
10 |
+
|
11 |
+
def get_conversational_chain():
|
12 |
+
prompt_template = """You are an expert and polite HR.
|
13 |
+
In the context, a candidate's resume will be provided to you. Given a question the hiring manager wants to know about the candidate, i want you to give the answer with the most precision. Feel free to answer in sentences or bullet points whatever you find suitable.
|
14 |
+
if there is some "\n" imagine things are writting in separate lines. make your move accordingly
|
15 |
+
If the question has no answer present in the resume,
|
16 |
+
feel free to say, "try ansking something else, this information is not available", don't provide the wrong answer no matter what is present in the question\n\n
|
17 |
+
Context:\n {context}?\n
|
18 |
+
Question: \n{question}\n
|
19 |
+
|
20 |
+
Answer:
|
21 |
+
"""
|
22 |
+
model = ChatOpenAI(temperature=0.7, api_key=secrets["OPENAI_API_KEY"])
|
23 |
+
memory = ConversationBufferMemory(llm=model, input_key = 'question', memory_key="chat_history")
|
24 |
+
|
25 |
+
prompt = PromptTemplate(template=prompt_template,
|
26 |
+
input_variables=["context"])
|
27 |
+
chain = LLMChain(llm=model, prompt = prompt,
|
28 |
+
memory=memory)
|
29 |
+
|
30 |
+
return chain
|
31 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
langchain_openai
|
4 |
+
PyPDF2
|
5 |
+
python-dotenv
|
6 |
+
requests
|
7 |
+
beautifulsoup4==4.12.3
|
8 |
+
requests==2.26.0
|
9 |
+
streamlit
|
10 |
+
counter
|
11 |
+
faiss-cpu
|