Spaces:

ak0601
/

Job_reccomendation

Sleeping

App Files Files Community

Job_reccomendation / src /streamlit_app.py

ak0601

Update src/streamlit_app.py

75d66f3 verified 3 months ago

raw

history blame

10.1 kB

	import streamlit as st
	import pandas as pd
	import PyPDF2
	import os
	from google.oauth2 import service_account
	import gspread
	from pydantic import BaseModel, Field
	from typing import List
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_google_genai import ChatGoogleGenerativeAI
	import time
	from dotenv import load_dotenv
	import re

	load_dotenv()
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

	class structure(BaseModel):
	name: str = Field(description="Name of the candidate")
	location: str = Field(description="The location of the candidate.")
	skills: List[str] = Field(description="List of individual skills of the candidate")
	ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
	yoe: str = Field(description="Years of experience of the candidate.")
	experience: str = Field(description="A brief summary of the candidate's past experience.")


	class Job(BaseModel):
	job_title: str = Field(description="The title of the job.")
	company: str = Field(description="The company offering the job.")
	location: str = Field(description="The location of the job.")
	skills: List[str] = Field(description="List of skills required for the job.")
	description: str = Field(description="A brief description of the job.")
	relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")


	# ——— helper to parse a comma-separated tech stack into a set ———
	def parse_tech_stack(stack):
	if pd.isna(stack) or stack == "" or stack is None:
	return set()
	if isinstance(stack, set):
	return stack
	try:
	if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
	items = stack.strip("{}").split(",")
	return set(item.strip().strip("'\"").lower() for item in items if item.strip())
	return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
	except Exception as e:
	st.error(f"Error parsing tech stack: {e}")
	return set()


	def initialize_google_sheets():
	SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json'
	SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
	if not os.path.exists(SERVICE_ACCOUNT_FILE):
	st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")
	return None
	creds = service_account.Credentials.from_service_account_file(
	SERVICE_ACCOUNT_FILE, scopes=SCOPES
	)
	return gspread.authorize(creds)


	def load_jobs_data():
	gc = initialize_google_sheets()
	if gc is None:
	return None
	try:
	ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \
	.worksheet("paraform_jobs_formatted")
	data = ws.get_all_values()
	df = pd.DataFrame(data[1:], columns=data[0]).fillna("")
	# parse Tech Stack into a set for each row
	df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack)
	return df
	except Exception as e:
	st.error(f"Error loading jobs data: {e}")
	return None


	def extract_text_from_pdf(pdf_file):
	reader = PyPDF2.PdfReader(pdf_file)
	return "".join(page.extract_text() or "" for page in reader.pages)


	def structure_resume_data(resume_text):
	llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
	# llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
	sum_llm = llm.with_structured_output(structure)
	prompt = ChatPromptTemplate.from_messages([
	("system", "You extract structured data from resumes."),
	("human", "Extract: {resume_text}. If missing, return Unknown for each field.")
	])
	return (prompt \| sum_llm).invoke({"resume_text": resume_text})


	def eval_jobs(jobs_df, resume_text):
	"""
	- Extract structured candidate info
	- Build candidate skill set
	- Pre‐filter jobs by requiring ≥2 overlapping skills
	- For the filtered set, run the LLM‐evaluation loop
	- At each iteration, check st.session_state.evaluation_running;
	if False, break out immediately.
	"""
	response = structure_resume_data(resume_text)
	candidate_skills = set(skill.lower() for skill in response.skills)

	# Quick helper to count overlaps
	def matching_skill_count(tech_stack):
	job_skills = set(skill.strip().lower() for skill in tech_stack.split(","))
	return len(candidate_skills & job_skills)

	# Pre‐filter: require ≥2 overlapping skills
	jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count)
	filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()

	if filtered.empty:
	st.warning("No jobs passed the tech‐stack pre‐filter.")
	return pd.DataFrame()

	candidate_text = (
	f"{response.name} {response.location} "
	f"{', '.join(response.skills)} {response.ideal_jobs} "
	f"{response.yoe} {response.experience}"
	)

	# LLM setup
	llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
	# llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)

	eval_llm = llm.with_structured_output(Job)
	system_msg = """
	You are an expert recruiter. Filter by location, experience, and skills,
	then rate relevance out of 10."""
	prompt = ChatPromptTemplate.from_messages([
	("system", system_msg),
	("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.")
	])
	chain = prompt \| eval_llm

	jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]]
	results = []

	progress_bar = st.progress(0)
	status_text = st.empty()
	total = len(jobs_for_eval)

	for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
	# Check the "Stop Evaluation" flag before each iteration
	if not st.session_state.evaluation_running:
	# User clicked Stop → break out immediately
	status_text.text("Evaluation halted by user.")
	break

	progress_bar.progress(i / total)
	status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}")

	job_text = " ".join([
	row.Role,
	row.Company,
	row.Locations,
	", ".join(row.parsed_stack),
	str(row.YOE)
	])

	eval_job = chain.invoke({
	"job_text": job_text,
	"candidate_text": candidate_text
	})

	results.append({
	"job_title": eval_job.job_title,
	"company": eval_job.company,
	"location": eval_job.location,
	"skills": eval_job.skills,
	"description": eval_job.description,
	"relevance_score": eval_job.relevance_score,
	"matching_skills": row.matching_skills
	})
	time.sleep(5) # Simulate processing delay

	progress_bar.empty()
	status_text.empty()

	# Build a DataFrame from whatever has been processed so far
	if results:
	df_results = pd.DataFrame(results)
	# Sort by matching_skills first, then relevance_score
	df_results = df_results.sort_values(
	by=["matching_skills", "relevance_score"],
	ascending=[False, False]
	).head(10)
	else:
	df_results = pd.DataFrame()

	return df_results


	def preprocess_text(text):
	return re.sub(r'[^a-zA-Z\s]', '', text.lower())


	def main():
	st.title("Resume Evaluator and Job Recommender")

	# Initialize session state flags
	if 'evaluation_running' not in st.session_state:
	st.session_state.evaluation_running = False
	if 'evaluation_complete' not in st.session_state:
	st.session_state.evaluation_complete = False

	uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])

	# Show “Stop Evaluation” while the loop is running
	if st.session_state.evaluation_running:
	if st.button("Stop Evaluation"):
	# User clicked “Stop” → flip the flag
	st.session_state.evaluation_running = False
	st.warning("User requested to stop evaluation.")

	if uploaded_file is not None:
	# Only show “Generate Recommendations” if not already running
	if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"):
	# Kick off
	st.session_state.evaluation_running = True
	st.session_state.evaluation_complete = False

	# 1. Load jobs
	jobs_df = load_jobs_data()
	if jobs_df is None:
	st.session_state.evaluation_running = False
	return

	# 2. Extract text from PDF
	resume_text = extract_text_from_pdf(uploaded_file)
	if not resume_text.strip():
	st.error("Uploaded PDF contains no text.")
	st.session_state.evaluation_running = False
	return

	resume_text = preprocess_text(resume_text)
	st.success("Resume text extracted successfully!")

	# 3. Run the evaluation (this may take a while)
	with st.spinner("Evaluating jobs…"):
	recs = eval_jobs(jobs_df, resume_text)

	# 4. Display results (or a warning if nothing returned)
	if not recs.empty:
	st.write("Recommended Jobs:")
	st.dataframe(recs)
	st.session_state.evaluation_complete = True
	else:
	st.warning("No matching jobs found or evaluation was halted early.")

	# Mark evaluation as done (or halted)
	st.session_state.evaluation_running = False

	# After evaluation finishes, allow the user to try another resume
	if st.session_state.evaluation_complete:
	if st.button("Try Another Resume"):
	st.session_state.evaluation_complete = False
	st.rerun()


	if __name__ == "__main__":
	main()