Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +264 -130
src/streamlit_app.py
CHANGED
@@ -8,14 +8,45 @@ from pydantic import BaseModel, Field
|
|
8 |
from typing import List
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
11 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
12 |
import time
|
13 |
-
from dotenv import load_dotenv
|
14 |
import re
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
class structure(BaseModel):
|
21 |
name: str = Field(description="Name of the candidate")
|
@@ -35,109 +66,179 @@ class Job(BaseModel):
|
|
35 |
relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
|
36 |
|
37 |
|
38 |
-
#
|
|
|
|
|
|
|
39 |
def parse_tech_stack(stack):
|
40 |
if pd.isna(stack) or stack == "" or stack is None:
|
41 |
return set()
|
42 |
if isinstance(stack, set):
|
43 |
return stack
|
44 |
try:
|
|
|
45 |
if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
|
46 |
items = stack.strip("{}").split(",")
|
47 |
return set(item.strip().strip("'\"").lower() for item in items if item.strip())
|
|
|
48 |
return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
|
49 |
except Exception as e:
|
50 |
st.error(f"Error parsing tech stack: {e}")
|
51 |
return set()
|
52 |
|
53 |
|
|
|
|
|
|
|
|
|
54 |
def initialize_google_sheets():
|
55 |
-
SERVICE_ACCOUNT_FILE =
|
56 |
-
SCOPES = [
|
57 |
if not os.path.exists(SERVICE_ACCOUNT_FILE):
|
58 |
-
st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
return None
|
60 |
-
creds = service_account.Credentials.from_service_account_file(
|
61 |
-
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
62 |
-
)
|
63 |
-
return gspread.authorize(creds)
|
64 |
|
65 |
|
66 |
def load_jobs_data():
|
67 |
gc = initialize_google_sheets()
|
68 |
if gc is None:
|
69 |
return None
|
|
|
70 |
try:
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
return df
|
|
|
78 |
except Exception as e:
|
79 |
-
st.error(f"Error loading jobs data: {e}")
|
80 |
return None
|
81 |
|
82 |
|
|
|
|
|
|
|
|
|
83 |
def extract_text_from_pdf(pdf_file):
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
|
|
|
|
|
|
87 |
|
88 |
-
def structure_resume_data(resume_text):
|
89 |
-
llm = ChatOpenAI(
|
90 |
-
|
|
|
|
|
|
|
91 |
sum_llm = llm.with_structured_output(structure)
|
|
|
92 |
prompt = ChatPromptTemplate.from_messages([
|
93 |
-
("system", "You
|
94 |
-
("human", "Extract
|
|
|
95 |
])
|
96 |
-
return (prompt | sum_llm).invoke({"resume_text": resume_text})
|
97 |
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
"""
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
107 |
"""
|
108 |
response = structure_resume_data(resume_text)
|
109 |
candidate_skills = set(skill.lower() for skill in response.skills)
|
110 |
|
111 |
-
#
|
112 |
-
def matching_skill_count(tech_stack):
|
113 |
-
job_skills = set(
|
114 |
return len(candidate_skills & job_skills)
|
115 |
|
116 |
-
|
117 |
-
jobs_df[
|
118 |
-
filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()
|
119 |
|
120 |
if filtered.empty:
|
121 |
-
st.warning("No jobs passed the
|
122 |
return pd.DataFrame()
|
123 |
|
|
|
124 |
candidate_text = (
|
125 |
f"{response.name} {response.location} "
|
126 |
f"{', '.join(response.skills)} {response.ideal_jobs} "
|
127 |
f"{response.yoe} {response.experience}"
|
128 |
)
|
129 |
|
130 |
-
# LLM setup
|
131 |
-
llm = ChatOpenAI(
|
132 |
-
|
133 |
-
|
|
|
|
|
134 |
eval_llm = llm.with_structured_output(Job)
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
138 |
prompt = ChatPromptTemplate.from_messages([
|
139 |
("system", system_msg),
|
140 |
-
("human", "Evaluate Job: {job_text}
|
|
|
141 |
])
|
142 |
chain = prompt | eval_llm
|
143 |
|
@@ -149,10 +250,9 @@ def eval_jobs(jobs_df, resume_text):
|
|
149 |
total = len(jobs_for_eval)
|
150 |
|
151 |
for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
|
152 |
-
#
|
153 |
if not st.session_state.evaluation_running:
|
154 |
-
|
155 |
-
status_text.text("Evaluation halted by user.")
|
156 |
break
|
157 |
|
158 |
progress_bar.progress(i / total)
|
@@ -166,105 +266,139 @@ def eval_jobs(jobs_df, resume_text):
|
|
166 |
str(row.YOE)
|
167 |
])
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
results.append({
|
175 |
-
"job_title":
|
176 |
-
"company":
|
177 |
-
"location":
|
178 |
-
"skills":
|
179 |
-
"description":
|
180 |
-
"relevance_score":
|
181 |
-
"matching_skills":
|
182 |
})
|
183 |
-
|
|
|
|
|
184 |
|
185 |
progress_bar.empty()
|
186 |
status_text.empty()
|
187 |
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
df_results = pd.DataFrame()
|
198 |
|
199 |
return df_results
|
200 |
|
201 |
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
204 |
|
|
|
|
|
|
|
205 |
|
206 |
def main():
|
207 |
-
st.title("Resume Evaluator
|
208 |
|
209 |
-
# Initialize session state flags
|
210 |
-
if
|
211 |
st.session_state.evaluation_running = False
|
212 |
-
if
|
213 |
st.session_state.evaluation_complete = False
|
214 |
|
215 |
-
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
-
#
|
218 |
-
|
219 |
-
if st.button("Stop Evaluation"):
|
220 |
-
# User clicked βStopβ β flip the flag
|
221 |
-
st.session_state.evaluation_running = False
|
222 |
-
st.warning("User requested to stop evaluation.")
|
223 |
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
st.
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
jobs_df = load_jobs_data()
|
233 |
-
if jobs_df is None:
|
234 |
-
st.session_state.evaluation_running = False
|
235 |
-
return
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
st.session_state.evaluation_running = False
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
st.success("Resume text extracted successfully!")
|
246 |
-
|
247 |
-
# 3. Run the evaluation (this may take a while)
|
248 |
-
with st.spinner("Evaluating jobsβ¦"):
|
249 |
-
recs = eval_jobs(jobs_df, resume_text)
|
250 |
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
else:
|
257 |
-
st.warning("No matching jobs found or evaluation was halted early.")
|
258 |
-
|
259 |
-
# Mark evaluation as done (or halted)
|
260 |
-
st.session_state.evaluation_running = False
|
261 |
-
|
262 |
-
# After evaluation finishes, allow the user to try another resume
|
263 |
-
if st.session_state.evaluation_complete:
|
264 |
-
if st.button("Try Another Resume"):
|
265 |
-
st.session_state.evaluation_complete = False
|
266 |
-
st.rerun()
|
267 |
|
268 |
|
269 |
if __name__ == "__main__":
|
270 |
-
main()
|
|
|
8 |
from typing import List
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
|
|
11 |
import time
|
|
|
12 |
import re
|
13 |
|
14 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
15 |
+
# 1) ENVIRONMENT VARIABLES / SECRETS
|
16 |
+
#
|
17 |
+
# On Huggingface Spaces:
|
18 |
+
# - Go to your Spaceβs Settings β Secrets and add:
|
19 |
+
# β’ OPENAI_API_KEY = yourβopenaiβkey
|
20 |
+
# β’ GOOGLE_API_KEY = yourβgoogleβkey (if you use any Google LLM)
|
21 |
+
# - If you also need a Google Service Account JSON, either:
|
22 |
+
# a) Commit it (careful: that is public by default β only do so if itβs nonβsensitive!),
|
23 |
+
# b) Or add it as βRepository Filesβ via the βFiles & versionsβ tab,
|
24 |
+
# c) Or load it from a Secret.
|
25 |
+
#
|
26 |
+
# In code below, weβll assume the serviceβaccount JSON is committed under:
|
27 |
+
# ββ synapse-recruitment-34e7b48899b4.json
|
28 |
+
#
|
29 |
+
# If you instead want to load it from a singleβline environment variable, you can do:
|
30 |
+
# service_account_info = json.loads(os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON"))
|
31 |
+
# creds = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
|
32 |
+
#
|
33 |
+
# For now, weβll simply use:
|
34 |
+
# SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
|
35 |
+
#
|
36 |
+
# And expect that file to be present in the topβlevel of your repo/Space.
|
37 |
+
#
|
38 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
39 |
+
|
40 |
+
|
41 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
42 |
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
|
43 |
+
|
44 |
+
if OPENAI_API_KEY == "":
|
45 |
+
st.warning("β οΈ OPENAI_API_KEY is not set. The LLM calls will fail unless you add it under Secrets.")
|
46 |
+
|
47 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
48 |
+
# 2) Pydantic models for structured output
|
49 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
50 |
|
51 |
class structure(BaseModel):
|
52 |
name: str = Field(description="Name of the candidate")
|
|
|
66 |
relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
|
67 |
|
68 |
|
69 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
70 |
+
# 3) Helper: parse a commaβseparated βTech Stackβ string into a Python set
|
71 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
72 |
+
|
73 |
def parse_tech_stack(stack):
|
74 |
if pd.isna(stack) or stack == "" or stack is None:
|
75 |
return set()
|
76 |
if isinstance(stack, set):
|
77 |
return stack
|
78 |
try:
|
79 |
+
# If it's literally a Pythonβset string like "{'python','django'}"
|
80 |
if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
|
81 |
items = stack.strip("{}").split(",")
|
82 |
return set(item.strip().strip("'\"").lower() for item in items if item.strip())
|
83 |
+
# Otherwise assume commaβseparated values
|
84 |
return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
|
85 |
except Exception as e:
|
86 |
st.error(f"Error parsing tech stack: {e}")
|
87 |
return set()
|
88 |
|
89 |
|
90 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
91 |
+
# 4) Google Sheets initialization (Service Account JSON must be present in repo)
|
92 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
93 |
+
|
94 |
def initialize_google_sheets():
|
95 |
+
SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
|
96 |
+
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
|
97 |
if not os.path.exists(SERVICE_ACCOUNT_FILE):
|
98 |
+
st.error(f"Service account file not found at '{SERVICE_ACCOUNT_FILE}'.\n"
|
99 |
+
"Either commit it into the repo or load from a Secret.")
|
100 |
+
return None
|
101 |
+
|
102 |
+
try:
|
103 |
+
creds = service_account.Credentials.from_service_account_file(
|
104 |
+
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
105 |
+
)
|
106 |
+
return gspread.authorize(creds)
|
107 |
+
except Exception as e:
|
108 |
+
st.error(f"Failed to load Google Service Account credentials: {e}")
|
109 |
return None
|
|
|
|
|
|
|
|
|
110 |
|
111 |
|
112 |
def load_jobs_data():
|
113 |
gc = initialize_google_sheets()
|
114 |
if gc is None:
|
115 |
return None
|
116 |
+
|
117 |
try:
|
118 |
+
# NOTE: Replace this key with your actual spreadsheet key
|
119 |
+
SPREADSHEET_KEY = "1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k"
|
120 |
+
worksheet = gc.open_by_key(SPREADSHEET_KEY).worksheet("paraform_jobs_formatted")
|
121 |
+
all_values = worksheet.get_all_values()
|
122 |
+
if not all_values or len(all_values) < 2:
|
123 |
+
st.warning("No data found in the Jobs sheet.")
|
124 |
+
return None
|
125 |
+
|
126 |
+
df = pd.DataFrame(all_values[1:], columns=all_values[0]).fillna("")
|
127 |
+
# Add a βparsed_stackβ column so we can preβfilter by skill overlap
|
128 |
+
df["parsed_stack"] = df["Tech Stack"].apply(parse_tech_stack)
|
129 |
return df
|
130 |
+
|
131 |
except Exception as e:
|
132 |
+
st.error(f"Error loading jobs data from Google Sheets: {e}")
|
133 |
return None
|
134 |
|
135 |
|
136 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
137 |
+
# 5) PDF β plain text
|
138 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
139 |
+
|
140 |
def extract_text_from_pdf(pdf_file):
|
141 |
+
try:
|
142 |
+
reader = PyPDF2.PdfReader(pdf_file)
|
143 |
+
full_text = ""
|
144 |
+
for page in reader.pages:
|
145 |
+
text = page.extract_text()
|
146 |
+
if text:
|
147 |
+
full_text += text + "\n"
|
148 |
+
return full_text
|
149 |
+
except Exception as e:
|
150 |
+
st.error(f"Failed to read PDF: {e}")
|
151 |
+
return ""
|
152 |
+
|
153 |
|
154 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
155 |
+
# 6) Call GPTβ4oβmini to extract structured fields from resume text
|
156 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
157 |
|
158 |
+
def structure_resume_data(resume_text: str) -> structure:
|
159 |
+
llm = ChatOpenAI(
|
160 |
+
model="gpt-4o-mini",
|
161 |
+
temperature=0.0,
|
162 |
+
max_retries=2,
|
163 |
+
)
|
164 |
sum_llm = llm.with_structured_output(structure)
|
165 |
+
|
166 |
prompt = ChatPromptTemplate.from_messages([
|
167 |
+
("system", "You are a helper that extracts structured data from a resume."),
|
168 |
+
("human", "Extract the following fields from this resume:\n{resume_text}\n"
|
169 |
+
"If any field is missing, return βUnknownβ.")
|
170 |
])
|
|
|
171 |
|
172 |
+
try:
|
173 |
+
parsed = (prompt | sum_llm).invoke({"resume_text": resume_text})
|
174 |
+
return parsed
|
175 |
+
except Exception as e:
|
176 |
+
st.error(f"Failed to extract structure from resume: {e}")
|
177 |
+
# Return a fallback with βUnknownβ fields
|
178 |
+
return structure(
|
179 |
+
name="Unknown",
|
180 |
+
location="Unknown",
|
181 |
+
skills=[],
|
182 |
+
ideal_jobs="Unknown",
|
183 |
+
yoe="Unknown",
|
184 |
+
experience="Unknown"
|
185 |
+
)
|
186 |
+
|
187 |
+
|
188 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
189 |
+
# 7) Evaluate jobs: Preβfilter by requiring at least two overlapping skills,
|
190 |
+
# then run an LLM loop (with a βStopβ check on each iteration)
|
191 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
192 |
+
|
193 |
+
def eval_jobs(jobs_df: pd.DataFrame, resume_text: str) -> pd.DataFrame:
|
194 |
"""
|
195 |
+
1) Extract candidate info (list of skills, etc.)
|
196 |
+
2) Build a skillβset from response.skills
|
197 |
+
3) Preβfilter all jobs so that jobβs Tech Stack has β₯2 skills in common
|
198 |
+
4) For that filtered subset, run an LLM evaluation loop
|
199 |
+
β on each iteration, check `st.session_state.evaluation_running`:
|
200 |
+
if it has become False, break out immediately.
|
201 |
+
5) Return a DataFrame of topβ10 results (or empty if none).
|
202 |
"""
|
203 |
response = structure_resume_data(resume_text)
|
204 |
candidate_skills = set(skill.lower() for skill in response.skills)
|
205 |
|
206 |
+
# How many overlapping skills does each job have?
|
207 |
+
def matching_skill_count(tech_stack: str) -> int:
|
208 |
+
job_skills = set(s.strip().lower() for s in tech_stack.split(",") if s.strip())
|
209 |
return len(candidate_skills & job_skills)
|
210 |
|
211 |
+
jobs_df["matching_skills"] = jobs_df["Tech Stack"].apply(matching_skill_count)
|
212 |
+
filtered = jobs_df[jobs_df["matching_skills"] >= 2].copy()
|
|
|
213 |
|
214 |
if filtered.empty:
|
215 |
+
st.warning("No jobs passed the 2-skill pre-filter.")
|
216 |
return pd.DataFrame()
|
217 |
|
218 |
+
# Build a candidate_text blob for the LLM to consume
|
219 |
candidate_text = (
|
220 |
f"{response.name} {response.location} "
|
221 |
f"{', '.join(response.skills)} {response.ideal_jobs} "
|
222 |
f"{response.yoe} {response.experience}"
|
223 |
)
|
224 |
|
225 |
+
# LLM setup for jobβevaluation
|
226 |
+
llm = ChatOpenAI(
|
227 |
+
model="gpt-4o-mini",
|
228 |
+
temperature=0.0,
|
229 |
+
max_retries=2,
|
230 |
+
)
|
231 |
eval_llm = llm.with_structured_output(Job)
|
232 |
+
|
233 |
+
system_msg = (
|
234 |
+
"You are an expert recruiter. First, filter by location & experience. "
|
235 |
+
"Then pick jobs that match the candidateβs skills & background. "
|
236 |
+
"Finally, assign a relevance score (0β10)."
|
237 |
+
)
|
238 |
prompt = ChatPromptTemplate.from_messages([
|
239 |
("system", system_msg),
|
240 |
+
("human", "Evaluate Job: {job_text}\nCandidate: {candidate_text}\n"
|
241 |
+
"Return JSON with job_title, company, location, skills, description, relevance_score.")
|
242 |
])
|
243 |
chain = prompt | eval_llm
|
244 |
|
|
|
250 |
total = len(jobs_for_eval)
|
251 |
|
252 |
for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
|
253 |
+
# If the user clicked βStop Evaluationβ β evaluation_running = False
|
254 |
if not st.session_state.evaluation_running:
|
255 |
+
status_text.text("βΈοΈ Evaluation halted by user.")
|
|
|
256 |
break
|
257 |
|
258 |
progress_bar.progress(i / total)
|
|
|
266 |
str(row.YOE)
|
267 |
])
|
268 |
|
269 |
+
try:
|
270 |
+
eval_job = chain.invoke({
|
271 |
+
"job_text": job_text,
|
272 |
+
"candidate_text": candidate_text
|
273 |
+
})
|
274 |
+
except Exception as e:
|
275 |
+
st.error(f"LLM failed on job #{i}: {e}")
|
276 |
+
# Skip this job and continue
|
277 |
+
continue
|
278 |
|
279 |
results.append({
|
280 |
+
"job_title": eval_job.job_title,
|
281 |
+
"company": eval_job.company,
|
282 |
+
"location": eval_job.location,
|
283 |
+
"skills": eval_job.skills,
|
284 |
+
"description": eval_job.description,
|
285 |
+
"relevance_score": eval_job.relevance_score,
|
286 |
+
"matching_skills": row.matching_skills
|
287 |
})
|
288 |
+
|
289 |
+
# Simulate a delay so you can see the Stop button in action
|
290 |
+
time.sleep(0.5)
|
291 |
|
292 |
progress_bar.empty()
|
293 |
status_text.empty()
|
294 |
|
295 |
+
if not results:
|
296 |
+
return pd.DataFrame()
|
297 |
+
|
298 |
+
df_results = pd.DataFrame(results)
|
299 |
+
# Sort first by matching_skills desc, then by relevance_score desc, take top 10
|
300 |
+
df_results = df_results.sort_values(
|
301 |
+
by=["matching_skills", "relevance_score"],
|
302 |
+
ascending=[False, False]
|
303 |
+
).head(10)
|
|
|
304 |
|
305 |
return df_results
|
306 |
|
307 |
|
308 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
309 |
+
# 8) Clean rΓ©sumΓ© text (lowercase, strip special chars)
|
310 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
311 |
+
|
312 |
+
def preprocess_text(text: str) -> str:
|
313 |
+
return re.sub(r"[^a-zA-Z\s]", "", text.lower())
|
314 |
+
|
315 |
|
316 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
317 |
+
# 9) Streamlit UI
|
318 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
319 |
|
320 |
def main():
|
321 |
+
st.title("π Resume Evaluator & Job Recommender")
|
322 |
|
323 |
+
# 9.1) Initialize session state flags
|
324 |
+
if "evaluation_running" not in st.session_state:
|
325 |
st.session_state.evaluation_running = False
|
326 |
+
if "evaluation_complete" not in st.session_state:
|
327 |
st.session_state.evaluation_complete = False
|
328 |
|
329 |
+
# 9.2) File uploader
|
330 |
+
uploaded_file = st.file_uploader(
|
331 |
+
"Upload your resume (PDF)",
|
332 |
+
type=["pdf"],
|
333 |
+
help="After picking a PDF, click βGenerate Recommendationsβ below."
|
334 |
+
)
|
335 |
|
336 |
+
# 9.3) Always show BOTH βGenerate Recommendationsβ and βStop Evaluationβ in two columns
|
337 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
|
|
338 |
|
339 |
+
with col1:
|
340 |
+
if st.session_state.evaluation_running:
|
341 |
+
st.button("Generate Recommendations", disabled=True)
|
342 |
+
else:
|
343 |
+
if st.button("Generate Recommendations"):
|
344 |
+
# 9.4) User clicked βGenerateβ β begin
|
345 |
+
st.session_state.evaluation_running = True
|
346 |
+
st.session_state.evaluation_complete = False
|
|
|
|
|
|
|
|
|
347 |
|
348 |
+
# 9.5) Ensure a file was actually uploaded
|
349 |
+
if uploaded_file is None:
|
350 |
+
st.error("β Please upload a PDF before clicking βGenerate Recommendationsβ.")
|
351 |
+
st.session_state.evaluation_running = False
|
352 |
+
else:
|
353 |
+
# Debug: print basic type of what streamlit handed us
|
354 |
+
st.write(f"βΆοΈ Received file of type: `{type(uploaded_file)}`")
|
355 |
+
|
356 |
+
# 9.6) Load job sheet
|
357 |
+
jobs_df = load_jobs_data()
|
358 |
+
if jobs_df is None:
|
359 |
+
st.session_state.evaluation_running = False
|
360 |
+
return
|
361 |
+
|
362 |
+
# 9.7) Extract text from the PDF
|
363 |
+
raw_text = extract_text_from_pdf(uploaded_file)
|
364 |
+
if not raw_text.strip():
|
365 |
+
st.error("β οΈ The uploaded PDF appears to contain no extractable text.")
|
366 |
+
st.session_state.evaluation_running = False
|
367 |
+
return
|
368 |
+
|
369 |
+
cleaned = preprocess_text(raw_text)
|
370 |
+
st.success("β
Resume text extracted successfully!")
|
371 |
+
|
372 |
+
# 9.8) Run the lengthy eval loop inside a spinner
|
373 |
+
with st.spinner("Evaluating jobsβ¦"):
|
374 |
+
recommendations = eval_jobs(jobs_df, cleaned)
|
375 |
+
|
376 |
+
# 9.9) Show results (or warning if none)
|
377 |
+
if not recommendations.empty:
|
378 |
+
st.header("Recommended Jobs")
|
379 |
+
st.dataframe(recommendations)
|
380 |
+
st.session_state.evaluation_complete = True
|
381 |
+
else:
|
382 |
+
st.warning("No matching jobs found or evaluation was halted midβstream.")
|
383 |
+
|
384 |
+
# 9.10) Done (or halted)
|
385 |
+
st.session_state.evaluation_running = False
|
386 |
+
|
387 |
+
with col2:
|
388 |
+
# The βStop Evaluationβ button is only enabled while evaluation_running is True:
|
389 |
+
if st.session_state.evaluation_running:
|
390 |
+
if st.button("Stop Evaluation"):
|
391 |
st.session_state.evaluation_running = False
|
392 |
+
st.warning("βΈοΈ User requested to stop evaluation.")
|
393 |
+
else:
|
394 |
+
st.button("Stop Evaluation", disabled=True)
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
+
# 9.11) Once complete, allow βTry Another Resumeβ to reset
|
397 |
+
if st.session_state.evaluation_complete:
|
398 |
+
if st.button("Try Another Resume"):
|
399 |
+
st.session_state.evaluation_complete = False
|
400 |
+
st.experimental_rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
401 |
|
402 |
|
403 |
if __name__ == "__main__":
|
404 |
+
main()
|