Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import PyPDF2 | |
import os | |
from google.oauth2 import service_account | |
import gspread | |
from pydantic import BaseModel, Field | |
from typing import List | |
from langchain_openai import ChatOpenAI | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
import time | |
from dotenv import load_dotenv | |
import re | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
class structure(BaseModel): | |
name: str = Field(description="Name of the candidate") | |
location: str = Field(description="The location of the candidate.") | |
skills: List[str] = Field(description="List of individual skills of the candidate") | |
ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.") | |
yoe: str = Field(description="Years of experience of the candidate.") | |
experience: str = Field(description="A brief summary of the candidate's past experience.") | |
class Job(BaseModel): | |
job_title: str = Field(description="The title of the job.") | |
company: str = Field(description="The company offering the job.") | |
location: str = Field(description="The location of the job.") | |
skills: List[str] = Field(description="List of skills required for the job.") | |
description: str = Field(description="A brief description of the job.") | |
relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.") | |
# ——— helper to parse a comma-separated tech stack into a set ——— | |
def parse_tech_stack(stack): | |
if pd.isna(stack) or stack == "" or stack is None: | |
return set() | |
if isinstance(stack, set): | |
return stack | |
try: | |
if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"): | |
items = stack.strip("{}").split(",") | |
return set(item.strip().strip("'\"").lower() for item in items if item.strip()) | |
return set(s.strip().lower() for s in str(stack).split(",") if s.strip()) | |
except Exception as e: | |
st.error(f"Error parsing tech stack: {e}") | |
return set() | |
def initialize_google_sheets(): | |
SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json' | |
SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] | |
if not os.path.exists(SERVICE_ACCOUNT_FILE): | |
st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}") | |
return None | |
creds = service_account.Credentials.from_service_account_file( | |
SERVICE_ACCOUNT_FILE, scopes=SCOPES | |
) | |
return gspread.authorize(creds) | |
def load_jobs_data(): | |
gc = initialize_google_sheets() | |
if gc is None: | |
return None | |
try: | |
ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \ | |
.worksheet("paraform_jobs_formatted") | |
data = ws.get_all_values() | |
df = pd.DataFrame(data[1:], columns=data[0]).fillna("") | |
# parse Tech Stack into a set for each row | |
df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack) | |
return df | |
except Exception as e: | |
st.error(f"Error loading jobs data: {e}") | |
return None | |
def extract_text_from_pdf(pdf_file): | |
reader = PyPDF2.PdfReader(pdf_file) | |
return "".join(page.extract_text() or "" for page in reader.pages) | |
def structure_resume_data(resume_text): | |
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) | |
# llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY) | |
sum_llm = llm.with_structured_output(structure) | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", "You extract structured data from resumes."), | |
("human", "Extract: {resume_text}. If missing, return Unknown for each field.") | |
]) | |
return (prompt | sum_llm).invoke({"resume_text": resume_text}) | |
def eval_jobs(jobs_df, resume_text): | |
""" | |
- Extract structured candidate info | |
- Build candidate skill set | |
- Pre‐filter jobs by requiring ≥2 overlapping skills | |
- For the filtered set, run the LLM‐evaluation loop | |
- At each iteration, check st.session_state.evaluation_running; | |
if False, break out immediately. | |
""" | |
response = structure_resume_data(resume_text) | |
candidate_skills = set(skill.lower() for skill in response.skills) | |
# Quick helper to count overlaps | |
def matching_skill_count(tech_stack): | |
job_skills = set(skill.strip().lower() for skill in tech_stack.split(",")) | |
return len(candidate_skills & job_skills) | |
# Pre‐filter: require ≥2 overlapping skills | |
jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count) | |
filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy() | |
if filtered.empty: | |
st.warning("No jobs passed the tech‐stack pre‐filter.") | |
return pd.DataFrame() | |
candidate_text = ( | |
f"{response.name} {response.location} " | |
f"{', '.join(response.skills)} {response.ideal_jobs} " | |
f"{response.yoe} {response.experience}" | |
) | |
# LLM setup | |
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) | |
# llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY) | |
eval_llm = llm.with_structured_output(Job) | |
system_msg = """ | |
You are an expert recruiter. Filter by location, experience, and skills, | |
then rate relevance out of 10.""" | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", system_msg), | |
("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.") | |
]) | |
chain = prompt | eval_llm | |
jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]] | |
results = [] | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
total = len(jobs_for_eval) | |
for i, row in enumerate(jobs_for_eval.itertuples(), start=1): | |
# Check the "Stop Evaluation" flag before each iteration | |
if not st.session_state.evaluation_running: | |
# User clicked Stop → break out immediately | |
status_text.text("Evaluation halted by user.") | |
break | |
progress_bar.progress(i / total) | |
status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}") | |
job_text = " ".join([ | |
row.Role, | |
row.Company, | |
row.Locations, | |
", ".join(row.parsed_stack), | |
str(row.YOE) | |
]) | |
eval_job = chain.invoke({ | |
"job_text": job_text, | |
"candidate_text": candidate_text | |
}) | |
results.append({ | |
"job_title": eval_job.job_title, | |
"company": eval_job.company, | |
"location": eval_job.location, | |
"skills": eval_job.skills, | |
"description": eval_job.description, | |
"relevance_score": eval_job.relevance_score, | |
"matching_skills": row.matching_skills | |
}) | |
time.sleep(5) # Simulate processing delay | |
progress_bar.empty() | |
status_text.empty() | |
# Build a DataFrame from whatever has been processed so far | |
if results: | |
df_results = pd.DataFrame(results) | |
# Sort by matching_skills first, then relevance_score | |
df_results = df_results.sort_values( | |
by=["matching_skills", "relevance_score"], | |
ascending=[False, False] | |
).head(10) | |
else: | |
df_results = pd.DataFrame() | |
return df_results | |
def preprocess_text(text): | |
return re.sub(r'[^a-zA-Z\s]', '', text.lower()) | |
def main(): | |
st.title("Resume Evaluator and Job Recommender") | |
# Initialize session state flags | |
if 'evaluation_running' not in st.session_state: | |
st.session_state.evaluation_running = False | |
if 'evaluation_complete' not in st.session_state: | |
st.session_state.evaluation_complete = False | |
uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"]) | |
# Show “Stop Evaluation” while the loop is running | |
if st.session_state.evaluation_running: | |
if st.button("Stop Evaluation"): | |
# User clicked “Stop” → flip the flag | |
st.session_state.evaluation_running = False | |
st.warning("User requested to stop evaluation.") | |
if uploaded_file is not None: | |
# Only show “Generate Recommendations” if not already running | |
if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"): | |
# Kick off | |
st.session_state.evaluation_running = True | |
st.session_state.evaluation_complete = False | |
# 1. Load jobs | |
jobs_df = load_jobs_data() | |
if jobs_df is None: | |
st.session_state.evaluation_running = False | |
return | |
# 2. Extract text from PDF | |
resume_text = extract_text_from_pdf(uploaded_file) | |
if not resume_text.strip(): | |
st.error("Uploaded PDF contains no text.") | |
st.session_state.evaluation_running = False | |
return | |
resume_text = preprocess_text(resume_text) | |
st.success("Resume text extracted successfully!") | |
# 3. Run the evaluation (this may take a while) | |
with st.spinner("Evaluating jobs…"): | |
recs = eval_jobs(jobs_df, resume_text) | |
# 4. Display results (or a warning if nothing returned) | |
if not recs.empty: | |
st.write("Recommended Jobs:") | |
st.dataframe(recs) | |
st.session_state.evaluation_complete = True | |
else: | |
st.warning("No matching jobs found or evaluation was halted early.") | |
# Mark evaluation as done (or halted) | |
st.session_state.evaluation_running = False | |
# After evaluation finishes, allow the user to try another resume | |
if st.session_state.evaluation_complete: | |
if st.button("Try Another Resume"): | |
st.session_state.evaluation_complete = False | |
st.rerun() | |
if __name__ == "__main__": | |
main() |