Atif20024 commited on
Commit
d064c90
·
verified ·
1 Parent(s): 3dd8d0a

Main. Uploaded all the files required to run this app

Browse files
Files changed (5) hide show
  1. README.md +5 -4
  2. app.py +105 -0
  3. doc_loading.py +24 -0
  4. llm_functions.py +31 -0
  5. requirements.txt +11 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: LinkedIn Profile QA
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: streamlit
7
- sdk_version: 1.33.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: LinkedIn Profile QA
3
+ emoji: 🚀
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: streamlit
7
+ sdk_version: 1.32.2
8
  app_file: app.py
9
  pinned: false
10
+ license: unknown
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from doc_loading import read_pdf_text
3
+ from llm_functions import get_conversational_chain
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ import warnings
8
+ warnings.filterwarnings('ignore')
9
+
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+ from dataclasses import dataclass
13
+
14
+ secrets = st.secrets["openai"]
15
+ from openai import OpenAI
16
+ client = OpenAI(api_key=secrets["OPENAI_API_KEY"])
17
+
18
+
19
+ st.set_page_config(page_title="Resume QA")
20
+ st.header("Feel free to ask any question")
21
+
22
+ st.write("Welcome!!")
23
+ # st.session_state
24
+
25
+
26
+ if "option" not in st.session_state:
27
+ option = st.selectbox("How are you going to input your document?",
28
+ ("Upload LinkedIn PDF", "Upload xml"))
29
+ # Conditionally show components based on user's choice
30
+ file_name = ""
31
+ main_text = ""
32
+
33
+ if option == "Upload LinkedIn PDF":
34
+ uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"], key="uploading_pdf")
35
+ if uploaded_file is not None:
36
+ # Process the uploaded file
37
+ st.write("PDF uploaded successfully!")
38
+ main_text = read_pdf_text(uploaded_file)
39
+
40
+ else:
41
+ st.write("This functionality in under construction.")
42
+
43
+
44
+ if option == "Upload LinkedIn PDF":
45
+ if uploaded_file is not None and uploaded_file.type != "application/pdf":
46
+ st.error("Please choose a PDF file only.")
47
+
48
+
49
+ if len(main_text) > 0:
50
+ text_splitter = RecursiveCharacterTextSplitter(
51
+ chunk_size=500,
52
+ chunk_overlap=50,
53
+ length_function=len,
54
+ is_separator_regex='\n'
55
+ )
56
+ texts = text_splitter.create_documents([main_text])
57
+
58
+ embeddings = OpenAIEmbeddings(api_key=secrets['OPENAI_API_KEY'])
59
+ # store in vector db
60
+ db = FAISS.from_documents(texts, embeddings)
61
+
62
+ hash_name = f"{option.replace(' ', '-')}"
63
+ db.save_local(f'faiss_{hash_name}_index')
64
+
65
+ if "messages" not in st.session_state:
66
+ st.session_state.messages = []
67
+
68
+ st.write("What's your question? (try being a bit detailed about the question)")
69
+
70
+ @dataclass
71
+ class Message:
72
+ actor: str
73
+ payload: str
74
+
75
+ USER = "user"
76
+ ASSISTANT = "ai"
77
+ MESSAGES = "messages"
78
+ def initialize_session_state():
79
+ if MESSAGES not in st.session_state:
80
+ st.session_state[MESSAGES] = [Message(actor=ASSISTANT, payload="Hi!How can I help you?")]
81
+ if "llm_chain" not in st.session_state:
82
+ st.session_state["llm_chain"] = get_conversational_chain()
83
+
84
+ def get_llm_chain_from_session():
85
+ return st.session_state["llm_chain"]
86
+
87
+ initialize_session_state()
88
+
89
+ msg: Message
90
+ for msg in st.session_state[MESSAGES]:
91
+ st.chat_message(msg.actor).write(msg.payload)
92
+
93
+ prompt = st.chat_input("Enter a prompt here")
94
+
95
+ if prompt:
96
+ docs = db.similarity_search(prompt, k=5)
97
+ st.session_state[MESSAGES].append(Message(actor=USER, payload=prompt))
98
+ st.chat_message(USER).write(prompt)
99
+
100
+ with st.spinner("Please wait.."):
101
+ llm_chain = get_llm_chain_from_session()
102
+ response = llm_chain({"context": docs, "question": prompt})['text']
103
+
104
+ st.session_state[MESSAGES].append(Message(actor=ASSISTANT, payload=response))
105
+ st.chat_message(ASSISTANT).write(response)
doc_loading.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import re
4
+ import os
5
+ from PyPDF2 import PdfReader
6
+
7
+ def read_pdf_text(pdf_file):
8
+ # Reading the pdf
9
+ pdf_reader = PdfReader(pdf_file)
10
+ all_text = ""
11
+ # make it limited. min(5, len(pages))
12
+ for page in pdf_reader.pages:
13
+ all_text += page.extract_text()
14
+ return all_text
15
+
16
+ def parse_linkedin_pdf(pdf_text):
17
+ sections = re.split(r'\n(?=\b(?:Experience|Contact|Education|Top Skills|Languages|Honors-Awards)\b)', pdf_text)
18
+ parsed_data = {}
19
+ for section in sections:
20
+ lines = section.split('\n')
21
+ section_name = lines[0]
22
+ section_text = '\n'.join(lines[1:])
23
+ parsed_data[section_name] = section_text
24
+ return parsed_data
llm_functions.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.chains.question_answering import load_qa_chain
4
+ from langchain.chains import LLMChain
5
+ from langchain.memory import ConversationBufferMemory
6
+ from langchain.chains import RetrievalQA
7
+
8
+ import streamlit as st
9
+ secrets = st.secrets["openai"]
10
+
11
+ def get_conversational_chain():
12
+ prompt_template = """You are an expert and polite HR.
13
+ In the context, a candidate's resume will be provided to you. Given a question the hiring manager wants to know about the candidate, i want you to give the answer with the most precision. Feel free to answer in sentences or bullet points whatever you find suitable.
14
+ if there is some "\n" imagine things are writting in separate lines. make your move accordingly
15
+ If the question has no answer present in the resume,
16
+ feel free to say, "try ansking something else, this information is not available", don't provide the wrong answer no matter what is present in the question\n\n
17
+ Context:\n {context}?\n
18
+ Question: \n{question}\n
19
+
20
+ Answer:
21
+ """
22
+ model = ChatOpenAI(temperature=0.7, api_key=secrets["OPENAI_API_KEY"])
23
+ memory = ConversationBufferMemory(llm=model, input_key = 'question', memory_key="chat_history")
24
+
25
+ prompt = PromptTemplate(template=prompt_template,
26
+ input_variables=["context"])
27
+ chain = LLMChain(llm=model, prompt = prompt,
28
+ memory=memory)
29
+
30
+ return chain
31
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ langchain_openai
4
+ PyPDF2
5
+ python-dotenv
6
+ requests
7
+ beautifulsoup4==4.12.3
8
+ requests==2.26.0
9
+ streamlit
10
+ counter
11
+ faiss-cpu