Spaces:

ak0601
/

Job_reccomendation

Sleeping

App Files Files Community

ak0601 commited on May 31

Commit

75d66f3

verified ·

1 Parent(s): 84acedd

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +268 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,270 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import PyPDF2
+import os
+from google.oauth2 import service_account
+import gspread
+from pydantic import BaseModel, Field
+from typing import List
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_google_genai import ChatGoogleGenerativeAI
+import time
+from dotenv import load_dotenv
+import re
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+class structure(BaseModel):
+    name: str = Field(description="Name of the candidate")
+    location: str = Field(description="The location of the candidate.")
+    skills: List[str] = Field(description="List of individual skills of the candidate")
+    ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
+    yoe: str = Field(description="Years of experience of the candidate.")
+    experience: str = Field(description="A brief summary of the candidate's past experience.")
+class Job(BaseModel):
+    job_title: str = Field(description="The title of the job.")
+    company: str = Field(description="The company offering the job.")
+    location: str = Field(description="The location of the job.")
+    skills: List[str] = Field(description="List of skills required for the job.")
+    description: str = Field(description="A brief description of the job.")
+    relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
+# ——— helper to parse a comma-separated tech stack into a set ———
+def parse_tech_stack(stack):
+    if pd.isna(stack) or stack == "" or stack is None:
+        return set()
+    if isinstance(stack, set):
+        return stack
+    try:
+        if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
+            items = stack.strip("{}").split(",")
+            return set(item.strip().strip("'\"").lower() for item in items if item.strip())
+        return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
+    except Exception as e:
+        st.error(f"Error parsing tech stack: {e}")
+        return set()
+def initialize_google_sheets():
+    SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-34e7b48899b4.json'
+    SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
+    if not os.path.exists(SERVICE_ACCOUNT_FILE):
+        st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")
+        return None
+    creds = service_account.Credentials.from_service_account_file(
+        SERVICE_ACCOUNT_FILE, scopes=SCOPES
+    )
+    return gspread.authorize(creds)
+def load_jobs_data():
+    gc = initialize_google_sheets()
+    if gc is None:
+        return None
+    try:
+        ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \
+               .worksheet("paraform_jobs_formatted")
+        data = ws.get_all_values()
+        df = pd.DataFrame(data[1:], columns=data[0]).fillna("")
+        # parse Tech Stack into a set for each row
+        df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack)
+        return df
+    except Exception as e:
+        st.error(f"Error loading jobs data: {e}")
+        return None
+def extract_text_from_pdf(pdf_file):
+    reader = PyPDF2.PdfReader(pdf_file)
+    return "".join(page.extract_text() or "" for page in reader.pages)
+def structure_resume_data(resume_text):
+    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
+    sum_llm = llm.with_structured_output(structure)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You extract structured data from resumes."),
+        ("human", "Extract: {resume_text}. If missing, return Unknown for each field.")
+    ])
+    return (prompt | sum_llm).invoke({"resume_text": resume_text})
+def eval_jobs(jobs_df, resume_text):
+    """
+    - Extract structured candidate info
+    - Build candidate skill set
+    - Pre‐filter jobs by requiring ≥2 overlapping skills
+    - For the filtered set, run the LLM‐evaluation loop
+    - At each iteration, check st.session_state.evaluation_running;
+      if False, break out immediately.
+    """
+    response = structure_resume_data(resume_text)
+    candidate_skills = set(skill.lower() for skill in response.skills)
+    # Quick helper to count overlaps
+    def matching_skill_count(tech_stack):
+        job_skills = set(skill.strip().lower() for skill in tech_stack.split(","))
+        return len(candidate_skills & job_skills)
+    # Pre‐filter: require ≥2 overlapping skills
+    jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count)
+    filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()
+    if filtered.empty:
+        st.warning("No jobs passed the tech‐stack pre‐filter.")
+        return pd.DataFrame()
+    candidate_text = (
+        f"{response.name} {response.location} "
+        f"{', '.join(response.skills)} {response.ideal_jobs} "
+        f"{response.yoe} {response.experience}"
+    )
+    # LLM setup
+    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
+    eval_llm = llm.with_structured_output(Job)
+    system_msg = """
+    You are an expert recruiter. Filter by location, experience, and skills,
+    then rate relevance out of 10."""
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_msg),
+        ("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.")
+    ])
+    chain = prompt | eval_llm
+    jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]]
+    results = []
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    total = len(jobs_for_eval)
+    for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
+        # Check the "Stop Evaluation" flag before each iteration
+        if not st.session_state.evaluation_running:
+            # User clicked Stop → break out immediately
+            status_text.text("Evaluation halted by user.")
+            break
+        progress_bar.progress(i / total)
+        status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}")
+        job_text = " ".join([
+            row.Role,
+            row.Company,
+            row.Locations,
+            ", ".join(row.parsed_stack),
+            str(row.YOE)
+        ])
+        eval_job = chain.invoke({
+            "job_text": job_text,
+            "candidate_text": candidate_text
+        })
+        results.append({
+            "job_title":      eval_job.job_title,
+            "company":        eval_job.company,
+            "location":       eval_job.location,
+            "skills":         eval_job.skills,
+            "description":    eval_job.description,
+            "relevance_score": eval_job.relevance_score,
+            "matching_skills": row.matching_skills
+        })
+        time.sleep(5)  # Simulate processing delay
+    progress_bar.empty()
+    status_text.empty()
+    # Build a DataFrame from whatever has been processed so far
+    if results:
+        df_results = pd.DataFrame(results)
+        # Sort by matching_skills first, then relevance_score
+        df_results = df_results.sort_values(
+            by=["matching_skills", "relevance_score"],
+            ascending=[False, False]
+        ).head(10)
+    else:
+        df_results = pd.DataFrame()
+    return df_results
+def preprocess_text(text):
+    return re.sub(r'[^a-zA-Z\s]', '', text.lower())
+def main():
+    st.title("Resume Evaluator and Job Recommender")
+    # Initialize session state flags
+    if 'evaluation_running' not in st.session_state:
+        st.session_state.evaluation_running = False
+    if 'evaluation_complete' not in st.session_state:
+        st.session_state.evaluation_complete = False
+    uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])
+    # Show “Stop Evaluation” while the loop is running
+    if st.session_state.evaluation_running:
+        if st.button("Stop Evaluation"):
+            # User clicked “Stop” → flip the flag
+            st.session_state.evaluation_running = False
+            st.warning("User requested to stop evaluation.")
+    if uploaded_file is not None:
+        # Only show “Generate Recommendations” if not already running
+        if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"):
+            # Kick off
+            st.session_state.evaluation_running = True
+            st.session_state.evaluation_complete = False
+            # 1. Load jobs
+            jobs_df = load_jobs_data()
+            if jobs_df is None:
+                st.session_state.evaluation_running = False
+                return
+            # 2. Extract text from PDF
+            resume_text = extract_text_from_pdf(uploaded_file)
+            if not resume_text.strip():
+                st.error("Uploaded PDF contains no text.")
+                st.session_state.evaluation_running = False
+                return
+            resume_text = preprocess_text(resume_text)
+            st.success("Resume text extracted successfully!")
+            # 3. Run the evaluation (this may take a while)
+            with st.spinner("Evaluating jobs…"):
+                recs = eval_jobs(jobs_df, resume_text)
+            # 4. Display results (or a warning if nothing returned)
+            if not recs.empty:
+                st.write("Recommended Jobs:")
+                st.dataframe(recs)
+                st.session_state.evaluation_complete = True
+            else:
+                st.warning("No matching jobs found or evaluation was halted early.")
+            # Mark evaluation as done (or halted)
+            st.session_state.evaluation_running = False
+        # After evaluation finishes, allow the user to try another resume
+        if st.session_state.evaluation_complete:
+            if st.button("Try Another Resume"):
+                st.session_state.evaluation_complete = False
+                st.rerun()
+if __name__ == "__main__":
+    main()