File size: 10,148 Bytes
a413aeb
75d66f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a413aeb
75d66f3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
import streamlit as st
import pandas as pd
import PyPDF2
import os
from google.oauth2 import service_account
import gspread
from pydantic import BaseModel, Field
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
import time
from dotenv import load_dotenv
import re

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

class structure(BaseModel):
    name: str = Field(description="Name of the candidate")
    location: str = Field(description="The location of the candidate.")
    skills: List[str] = Field(description="List of individual skills of the candidate")
    ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
    yoe: str = Field(description="Years of experience of the candidate.")
    experience: str = Field(description="A brief summary of the candidate's past experience.")


class Job(BaseModel):
    job_title: str = Field(description="The title of the job.")
    company: str = Field(description="The company offering the job.")
    location: str = Field(description="The location of the job.")
    skills: List[str] = Field(description="List of skills required for the job.")
    description: str = Field(description="A brief description of the job.")
    relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")


# ——— helper to parse a comma-separated tech stack into a set ———
def parse_tech_stack(stack):
    if pd.isna(stack) or stack == "" or stack is None:
        return set()
    if isinstance(stack, set):
        return stack
    try:
        if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
            items = stack.strip("{}").split(",")
            return set(item.strip().strip("'\"").lower() for item in items if item.strip())
        return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
    except Exception as e:
        st.error(f"Error parsing tech stack: {e}")
        return set()


def initialize_google_sheets():
    SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json'
    SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
    if not os.path.exists(SERVICE_ACCOUNT_FILE):
        st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")
        return None
    creds = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES
    )
    return gspread.authorize(creds)


def load_jobs_data():
    gc = initialize_google_sheets()
    if gc is None:
        return None
    try:
        ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \
               .worksheet("paraform_jobs_formatted")
        data = ws.get_all_values()
        df = pd.DataFrame(data[1:], columns=data[0]).fillna("")
        # parse Tech Stack into a set for each row
        df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack)
        return df
    except Exception as e:
        st.error(f"Error loading jobs data: {e}")
        return None


def extract_text_from_pdf(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    return "".join(page.extract_text() or "" for page in reader.pages)


def structure_resume_data(resume_text):
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
    sum_llm = llm.with_structured_output(structure)
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You extract structured data from resumes."),
        ("human", "Extract: {resume_text}. If missing, return Unknown for each field.")
    ])
    return (prompt | sum_llm).invoke({"resume_text": resume_text})


def eval_jobs(jobs_df, resume_text):
    """
    - Extract structured candidate info
    - Build candidate skill set
    - Pre‐filter jobs by requiring ≥2 overlapping skills
    - For the filtered set, run the LLM‐evaluation loop
    - At each iteration, check st.session_state.evaluation_running;
      if False, break out immediately.
    """
    response = structure_resume_data(resume_text)
    candidate_skills = set(skill.lower() for skill in response.skills)

    # Quick helper to count overlaps
    def matching_skill_count(tech_stack):
        job_skills = set(skill.strip().lower() for skill in tech_stack.split(","))
        return len(candidate_skills & job_skills)

    # Pre‐filter: require ≥2 overlapping skills
    jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count)
    filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()

    if filtered.empty:
        st.warning("No jobs passed the tech‐stack pre‐filter.")
        return pd.DataFrame()

    candidate_text = (
        f"{response.name} {response.location} "
        f"{', '.join(response.skills)} {response.ideal_jobs} "
        f"{response.yoe} {response.experience}"
    )

    # LLM setup
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)

    eval_llm = llm.with_structured_output(Job)
    system_msg = """
    You are an expert recruiter. Filter by location, experience, and skills, 
    then rate relevance out of 10."""
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_msg),
        ("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.")
    ])
    chain = prompt | eval_llm

    jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]]
    results = []

    progress_bar = st.progress(0)
    status_text = st.empty()
    total = len(jobs_for_eval)

    for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
        # Check the "Stop Evaluation" flag before each iteration
        if not st.session_state.evaluation_running:
            # User clicked Stop → break out immediately
            status_text.text("Evaluation halted by user.")
            break

        progress_bar.progress(i / total)
        status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}")

        job_text = " ".join([
            row.Role,
            row.Company,
            row.Locations,
            ", ".join(row.parsed_stack),
            str(row.YOE)
        ])

        eval_job = chain.invoke({
            "job_text": job_text,
            "candidate_text": candidate_text
        })

        results.append({
            "job_title":      eval_job.job_title,
            "company":        eval_job.company,
            "location":       eval_job.location,
            "skills":         eval_job.skills,
            "description":    eval_job.description,
            "relevance_score": eval_job.relevance_score,
            "matching_skills": row.matching_skills
        })
        time.sleep(5)  # Simulate processing delay

    progress_bar.empty()
    status_text.empty()

    # Build a DataFrame from whatever has been processed so far
    if results:
        df_results = pd.DataFrame(results)
        # Sort by matching_skills first, then relevance_score
        df_results = df_results.sort_values(
            by=["matching_skills", "relevance_score"],
            ascending=[False, False]
        ).head(10)
    else:
        df_results = pd.DataFrame()

    return df_results


def preprocess_text(text):
    return re.sub(r'[^a-zA-Z\s]', '', text.lower())


def main():
    st.title("Resume Evaluator and Job Recommender")

    # Initialize session state flags
    if 'evaluation_running' not in st.session_state:
        st.session_state.evaluation_running = False
    if 'evaluation_complete' not in st.session_state:
        st.session_state.evaluation_complete = False

    uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])

    # Show “Stop Evaluation” while the loop is running
    if st.session_state.evaluation_running:
        if st.button("Stop Evaluation"):
            # User clicked “Stop” → flip the flag
            st.session_state.evaluation_running = False
            st.warning("User requested to stop evaluation.")

    if uploaded_file is not None:
        # Only show “Generate Recommendations” if not already running
        if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"):
            # Kick off
            st.session_state.evaluation_running = True
            st.session_state.evaluation_complete = False

            # 1. Load jobs
            jobs_df = load_jobs_data()
            if jobs_df is None:
                st.session_state.evaluation_running = False
                return

            # 2. Extract text from PDF
            resume_text = extract_text_from_pdf(uploaded_file)
            if not resume_text.strip():
                st.error("Uploaded PDF contains no text.")
                st.session_state.evaluation_running = False
                return

            resume_text = preprocess_text(resume_text)
            st.success("Resume text extracted successfully!")

            # 3. Run the evaluation (this may take a while)
            with st.spinner("Evaluating jobs…"):
                recs = eval_jobs(jobs_df, resume_text)

            # 4. Display results (or a warning if nothing returned)
            if not recs.empty:
                st.write("Recommended Jobs:")
                st.dataframe(recs)
                st.session_state.evaluation_complete = True
            else:
                st.warning("No matching jobs found or evaluation was halted early.")

            # Mark evaluation as done (or halted)
            st.session_state.evaluation_running = False

        # After evaluation finishes, allow the user to try another resume
        if st.session_state.evaluation_complete:
            if st.button("Try Another Resume"):
                st.session_state.evaluation_complete = False
                st.rerun()


if __name__ == "__main__":
    main()