import streamlit as st import pandas as pd import PyPDF2 import os from google.oauth2 import service_account import gspread from pydantic import BaseModel, Field from typing import List from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_google_genai import ChatGoogleGenerativeAI import time from dotenv import load_dotenv import re load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") class structure(BaseModel): name: str = Field(description="Name of the candidate") location: str = Field(description="The location of the candidate.") skills: List[str] = Field(description="List of individual skills of the candidate") ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.") yoe: str = Field(description="Years of experience of the candidate.") experience: str = Field(description="A brief summary of the candidate's past experience.") class Job(BaseModel): job_title: str = Field(description="The title of the job.") company: str = Field(description="The company offering the job.") location: str = Field(description="The location of the job.") skills: List[str] = Field(description="List of skills required for the job.") description: str = Field(description="A brief description of the job.") relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.") # ——— helper to parse a comma-separated tech stack into a set ——— def parse_tech_stack(stack): if pd.isna(stack) or stack == "" or stack is None: return set() if isinstance(stack, set): return stack try: if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"): items = stack.strip("{}").split(",") return set(item.strip().strip("'\"").lower() for item in items if item.strip()) return set(s.strip().lower() for s in str(stack).split(",") if s.strip()) except Exception as e: st.error(f"Error parsing tech stack: {e}") return set() def initialize_google_sheets(): SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json' SCOPES = ['https://www.googleapis.com/auth/spreadsheets'] if not os.path.exists(SERVICE_ACCOUNT_FILE): st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}") return None creds = service_account.Credentials.from_service_account_file( SERVICE_ACCOUNT_FILE, scopes=SCOPES ) return gspread.authorize(creds) def load_jobs_data(): gc = initialize_google_sheets() if gc is None: return None try: ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \ .worksheet("paraform_jobs_formatted") data = ws.get_all_values() df = pd.DataFrame(data[1:], columns=data[0]).fillna("") # parse Tech Stack into a set for each row df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack) return df except Exception as e: st.error(f"Error loading jobs data: {e}") return None def extract_text_from_pdf(pdf_file): reader = PyPDF2.PdfReader(pdf_file) return "".join(page.extract_text() or "" for page in reader.pages) def structure_resume_data(resume_text): llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY) sum_llm = llm.with_structured_output(structure) prompt = ChatPromptTemplate.from_messages([ ("system", "You extract structured data from resumes."), ("human", "Extract: {resume_text}. If missing, return Unknown for each field.") ]) return (prompt | sum_llm).invoke({"resume_text": resume_text}) def eval_jobs(jobs_df, resume_text): """ - Extract structured candidate info - Build candidate skill set - Pre‐filter jobs by requiring ≥2 overlapping skills - For the filtered set, run the LLM‐evaluation loop - At each iteration, check st.session_state.evaluation_running; if False, break out immediately. """ response = structure_resume_data(resume_text) candidate_skills = set(skill.lower() for skill in response.skills) # Quick helper to count overlaps def matching_skill_count(tech_stack): job_skills = set(skill.strip().lower() for skill in tech_stack.split(",")) return len(candidate_skills & job_skills) # Pre‐filter: require ≥2 overlapping skills jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count) filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy() if filtered.empty: st.warning("No jobs passed the tech‐stack pre‐filter.") return pd.DataFrame() candidate_text = ( f"{response.name} {response.location} " f"{', '.join(response.skills)} {response.ideal_jobs} " f"{response.yoe} {response.experience}" ) # LLM setup llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY) eval_llm = llm.with_structured_output(Job) system_msg = """ You are an expert recruiter. Filter by location, experience, and skills, then rate relevance out of 10.""" prompt = ChatPromptTemplate.from_messages([ ("system", system_msg), ("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.") ]) chain = prompt | eval_llm jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]] results = [] progress_bar = st.progress(0) status_text = st.empty() total = len(jobs_for_eval) for i, row in enumerate(jobs_for_eval.itertuples(), start=1): # Check the "Stop Evaluation" flag before each iteration if not st.session_state.evaluation_running: # User clicked Stop → break out immediately status_text.text("Evaluation halted by user.") break progress_bar.progress(i / total) status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}") job_text = " ".join([ row.Role, row.Company, row.Locations, ", ".join(row.parsed_stack), str(row.YOE) ]) eval_job = chain.invoke({ "job_text": job_text, "candidate_text": candidate_text }) results.append({ "job_title": eval_job.job_title, "company": eval_job.company, "location": eval_job.location, "skills": eval_job.skills, "description": eval_job.description, "relevance_score": eval_job.relevance_score, "matching_skills": row.matching_skills }) time.sleep(5) # Simulate processing delay progress_bar.empty() status_text.empty() # Build a DataFrame from whatever has been processed so far if results: df_results = pd.DataFrame(results) # Sort by matching_skills first, then relevance_score df_results = df_results.sort_values( by=["matching_skills", "relevance_score"], ascending=[False, False] ).head(10) else: df_results = pd.DataFrame() return df_results def preprocess_text(text): return re.sub(r'[^a-zA-Z\s]', '', text.lower()) def main(): st.title("Resume Evaluator and Job Recommender") # Initialize session state flags if 'evaluation_running' not in st.session_state: st.session_state.evaluation_running = False if 'evaluation_complete' not in st.session_state: st.session_state.evaluation_complete = False uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"]) # Show “Stop Evaluation” while the loop is running if st.session_state.evaluation_running: if st.button("Stop Evaluation"): # User clicked “Stop” → flip the flag st.session_state.evaluation_running = False st.warning("User requested to stop evaluation.") if uploaded_file is not None: # Only show “Generate Recommendations” if not already running if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"): # Kick off st.session_state.evaluation_running = True st.session_state.evaluation_complete = False # 1. Load jobs jobs_df = load_jobs_data() if jobs_df is None: st.session_state.evaluation_running = False return # 2. Extract text from PDF resume_text = extract_text_from_pdf(uploaded_file) if not resume_text.strip(): st.error("Uploaded PDF contains no text.") st.session_state.evaluation_running = False return resume_text = preprocess_text(resume_text) st.success("Resume text extracted successfully!") # 3. Run the evaluation (this may take a while) with st.spinner("Evaluating jobs…"): recs = eval_jobs(jobs_df, resume_text) # 4. Display results (or a warning if nothing returned) if not recs.empty: st.write("Recommended Jobs:") st.dataframe(recs) st.session_state.evaluation_complete = True else: st.warning("No matching jobs found or evaluation was halted early.") # Mark evaluation as done (or halted) st.session_state.evaluation_running = False # After evaluation finishes, allow the user to try another resume if st.session_state.evaluation_complete: if st.button("Try Another Resume"): st.session_state.evaluation_complete = False st.rerun() if __name__ == "__main__": main()