ak0601 commited on
Commit
75d66f3
·
verified ·
1 Parent(s): 84acedd

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +268 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,270 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import PyPDF2
4
+ import os
5
+ from google.oauth2 import service_account
6
+ import gspread
7
+ from pydantic import BaseModel, Field
8
+ from typing import List
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_google_genai import ChatGoogleGenerativeAI
12
+ import time
13
+ from dotenv import load_dotenv
14
+ import re
15
+
16
+ load_dotenv()
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
19
+
20
+ class structure(BaseModel):
21
+ name: str = Field(description="Name of the candidate")
22
+ location: str = Field(description="The location of the candidate.")
23
+ skills: List[str] = Field(description="List of individual skills of the candidate")
24
+ ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
25
+ yoe: str = Field(description="Years of experience of the candidate.")
26
+ experience: str = Field(description="A brief summary of the candidate's past experience.")
27
+
28
+
29
+ class Job(BaseModel):
30
+ job_title: str = Field(description="The title of the job.")
31
+ company: str = Field(description="The company offering the job.")
32
+ location: str = Field(description="The location of the job.")
33
+ skills: List[str] = Field(description="List of skills required for the job.")
34
+ description: str = Field(description="A brief description of the job.")
35
+ relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
36
+
37
+
38
+ # ——— helper to parse a comma-separated tech stack into a set ———
39
+ def parse_tech_stack(stack):
40
+ if pd.isna(stack) or stack == "" or stack is None:
41
+ return set()
42
+ if isinstance(stack, set):
43
+ return stack
44
+ try:
45
+ if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
46
+ items = stack.strip("{}").split(",")
47
+ return set(item.strip().strip("'\"").lower() for item in items if item.strip())
48
+ return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
49
+ except Exception as e:
50
+ st.error(f"Error parsing tech stack: {e}")
51
+ return set()
52
+
53
+
54
+ def initialize_google_sheets():
55
+ SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json'
56
+ SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
57
+ if not os.path.exists(SERVICE_ACCOUNT_FILE):
58
+ st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")
59
+ return None
60
+ creds = service_account.Credentials.from_service_account_file(
61
+ SERVICE_ACCOUNT_FILE, scopes=SCOPES
62
+ )
63
+ return gspread.authorize(creds)
64
+
65
+
66
+ def load_jobs_data():
67
+ gc = initialize_google_sheets()
68
+ if gc is None:
69
+ return None
70
+ try:
71
+ ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \
72
+ .worksheet("paraform_jobs_formatted")
73
+ data = ws.get_all_values()
74
+ df = pd.DataFrame(data[1:], columns=data[0]).fillna("")
75
+ # parse Tech Stack into a set for each row
76
+ df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack)
77
+ return df
78
+ except Exception as e:
79
+ st.error(f"Error loading jobs data: {e}")
80
+ return None
81
+
82
+
83
+ def extract_text_from_pdf(pdf_file):
84
+ reader = PyPDF2.PdfReader(pdf_file)
85
+ return "".join(page.extract_text() or "" for page in reader.pages)
86
+
87
+
88
+ def structure_resume_data(resume_text):
89
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
90
+ # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
91
+ sum_llm = llm.with_structured_output(structure)
92
+ prompt = ChatPromptTemplate.from_messages([
93
+ ("system", "You extract structured data from resumes."),
94
+ ("human", "Extract: {resume_text}. If missing, return Unknown for each field.")
95
+ ])
96
+ return (prompt | sum_llm).invoke({"resume_text": resume_text})
97
+
98
+
99
+ def eval_jobs(jobs_df, resume_text):
100
+ """
101
+ - Extract structured candidate info
102
+ - Build candidate skill set
103
+ - Pre‐filter jobs by requiring ≥2 overlapping skills
104
+ - For the filtered set, run the LLM‐evaluation loop
105
+ - At each iteration, check st.session_state.evaluation_running;
106
+ if False, break out immediately.
107
+ """
108
+ response = structure_resume_data(resume_text)
109
+ candidate_skills = set(skill.lower() for skill in response.skills)
110
+
111
+ # Quick helper to count overlaps
112
+ def matching_skill_count(tech_stack):
113
+ job_skills = set(skill.strip().lower() for skill in tech_stack.split(","))
114
+ return len(candidate_skills & job_skills)
115
+
116
+ # Pre‐filter: require ≥2 overlapping skills
117
+ jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count)
118
+ filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()
119
+
120
+ if filtered.empty:
121
+ st.warning("No jobs passed the tech‐stack pre‐filter.")
122
+ return pd.DataFrame()
123
+
124
+ candidate_text = (
125
+ f"{response.name} {response.location} "
126
+ f"{', '.join(response.skills)} {response.ideal_jobs} "
127
+ f"{response.yoe} {response.experience}"
128
+ )
129
+
130
+ # LLM setup
131
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
132
+ # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
133
+
134
+ eval_llm = llm.with_structured_output(Job)
135
+ system_msg = """
136
+ You are an expert recruiter. Filter by location, experience, and skills,
137
+ then rate relevance out of 10."""
138
+ prompt = ChatPromptTemplate.from_messages([
139
+ ("system", system_msg),
140
+ ("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.")
141
+ ])
142
+ chain = prompt | eval_llm
143
+
144
+ jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]]
145
+ results = []
146
+
147
+ progress_bar = st.progress(0)
148
+ status_text = st.empty()
149
+ total = len(jobs_for_eval)
150
+
151
+ for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
152
+ # Check the "Stop Evaluation" flag before each iteration
153
+ if not st.session_state.evaluation_running:
154
+ # User clicked Stop → break out immediately
155
+ status_text.text("Evaluation halted by user.")
156
+ break
157
+
158
+ progress_bar.progress(i / total)
159
+ status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}")
160
+
161
+ job_text = " ".join([
162
+ row.Role,
163
+ row.Company,
164
+ row.Locations,
165
+ ", ".join(row.parsed_stack),
166
+ str(row.YOE)
167
+ ])
168
+
169
+ eval_job = chain.invoke({
170
+ "job_text": job_text,
171
+ "candidate_text": candidate_text
172
+ })
173
+
174
+ results.append({
175
+ "job_title": eval_job.job_title,
176
+ "company": eval_job.company,
177
+ "location": eval_job.location,
178
+ "skills": eval_job.skills,
179
+ "description": eval_job.description,
180
+ "relevance_score": eval_job.relevance_score,
181
+ "matching_skills": row.matching_skills
182
+ })
183
+ time.sleep(5) # Simulate processing delay
184
+
185
+ progress_bar.empty()
186
+ status_text.empty()
187
+
188
+ # Build a DataFrame from whatever has been processed so far
189
+ if results:
190
+ df_results = pd.DataFrame(results)
191
+ # Sort by matching_skills first, then relevance_score
192
+ df_results = df_results.sort_values(
193
+ by=["matching_skills", "relevance_score"],
194
+ ascending=[False, False]
195
+ ).head(10)
196
+ else:
197
+ df_results = pd.DataFrame()
198
+
199
+ return df_results
200
+
201
+
202
+ def preprocess_text(text):
203
+ return re.sub(r'[^a-zA-Z\s]', '', text.lower())
204
+
205
+
206
+ def main():
207
+ st.title("Resume Evaluator and Job Recommender")
208
+
209
+ # Initialize session state flags
210
+ if 'evaluation_running' not in st.session_state:
211
+ st.session_state.evaluation_running = False
212
+ if 'evaluation_complete' not in st.session_state:
213
+ st.session_state.evaluation_complete = False
214
+
215
+ uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])
216
+
217
+ # Show “Stop Evaluation” while the loop is running
218
+ if st.session_state.evaluation_running:
219
+ if st.button("Stop Evaluation"):
220
+ # User clicked “Stop” → flip the flag
221
+ st.session_state.evaluation_running = False
222
+ st.warning("User requested to stop evaluation.")
223
+
224
+ if uploaded_file is not None:
225
+ # Only show “Generate Recommendations” if not already running
226
+ if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"):
227
+ # Kick off
228
+ st.session_state.evaluation_running = True
229
+ st.session_state.evaluation_complete = False
230
+
231
+ # 1. Load jobs
232
+ jobs_df = load_jobs_data()
233
+ if jobs_df is None:
234
+ st.session_state.evaluation_running = False
235
+ return
236
+
237
+ # 2. Extract text from PDF
238
+ resume_text = extract_text_from_pdf(uploaded_file)
239
+ if not resume_text.strip():
240
+ st.error("Uploaded PDF contains no text.")
241
+ st.session_state.evaluation_running = False
242
+ return
243
+
244
+ resume_text = preprocess_text(resume_text)
245
+ st.success("Resume text extracted successfully!")
246
+
247
+ # 3. Run the evaluation (this may take a while)
248
+ with st.spinner("Evaluating jobs…"):
249
+ recs = eval_jobs(jobs_df, resume_text)
250
+
251
+ # 4. Display results (or a warning if nothing returned)
252
+ if not recs.empty:
253
+ st.write("Recommended Jobs:")
254
+ st.dataframe(recs)
255
+ st.session_state.evaluation_complete = True
256
+ else:
257
+ st.warning("No matching jobs found or evaluation was halted early.")
258
+
259
+ # Mark evaluation as done (or halted)
260
+ st.session_state.evaluation_running = False
261
+
262
+ # After evaluation finishes, allow the user to try another resume
263
+ if st.session_state.evaluation_complete:
264
+ if st.button("Try Another Resume"):
265
+ st.session_state.evaluation_complete = False
266
+ st.rerun()
267
+
268
 
269
+ if __name__ == "__main__":
270
+ main()