Spaces:

ak0601
/

Job_reccomendation

Sleeping

App Files Files Community

ak0601 commited on May 31

Commit

1f3ebcc

verified ·

1 Parent(s): 9986241

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +264 -130

src/streamlit_app.py CHANGED Viewed

@@ -8,14 +8,45 @@ from pydantic import BaseModel, Field
 from typing import List
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_google_genai import ChatGoogleGenerativeAI
 import time
-from dotenv import load_dotenv
 import re
-# load_dotenv()
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 class structure(BaseModel):
     name: str = Field(description="Name of the candidate")
@@ -35,109 +66,179 @@ class Job(BaseModel):
     relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
-# ——— helper to parse a comma-separated tech stack into a set ———
 def parse_tech_stack(stack):
     if pd.isna(stack) or stack == "" or stack is None:
         return set()
     if isinstance(stack, set):
         return stack
     try:
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"").lower() for item in items if item.strip())
         return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
     except Exception as e:
         st.error(f"Error parsing tech stack: {e}")
         return set()
 def initialize_google_sheets():
-    SERVICE_ACCOUNT_FILE = 'synapse-recruitment-34e7b48899b4.json'
-    SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
     if not os.path.exists(SERVICE_ACCOUNT_FILE):
-        st.error(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")
         return None
-    creds = service_account.Credentials.from_service_account_file(
-        SERVICE_ACCOUNT_FILE, scopes=SCOPES
-    )
-    return gspread.authorize(creds)
 def load_jobs_data():
     gc = initialize_google_sheets()
     if gc is None:
         return None
     try:
-        ws = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k') \
-               .worksheet("paraform_jobs_formatted")
-        data = ws.get_all_values()
-        df = pd.DataFrame(data[1:], columns=data[0]).fillna("")
-        # parse Tech Stack into a set for each row
-        df['parsed_stack'] = df['Tech Stack'].apply(parse_tech_stack)
         return df
     except Exception as e:
-        st.error(f"Error loading jobs data: {e}")
         return None
 def extract_text_from_pdf(pdf_file):
-    reader = PyPDF2.PdfReader(pdf_file)
-    return "".join(page.extract_text() or "" for page in reader.pages)
-def structure_resume_data(resume_text):
-    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
-    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
     sum_llm = llm.with_structured_output(structure)
     prompt = ChatPromptTemplate.from_messages([
-        ("system", "You extract structured data from resumes."),
-        ("human", "Extract: {resume_text}. If missing, return Unknown for each field.")
     ])
-    return (prompt | sum_llm).invoke({"resume_text": resume_text})
-def eval_jobs(jobs_df, resume_text):
     """
-    - Extract structured candidate info
-    - Build candidate skill set
-    - Pre‐filter jobs by requiring ≥2 overlapping skills
-    - For the filtered set, run the LLM‐evaluation loop
-    - At each iteration, check st.session_state.evaluation_running;
-      if False, break out immediately.
     """
     response = structure_resume_data(resume_text)
     candidate_skills = set(skill.lower() for skill in response.skills)
-    # Quick helper to count overlaps
-    def matching_skill_count(tech_stack):
-        job_skills = set(skill.strip().lower() for skill in tech_stack.split(","))
         return len(candidate_skills & job_skills)
-    # Pre‐filter: require ≥2 overlapping skills
-    jobs_df['matching_skills'] = jobs_df['Tech Stack'].apply(matching_skill_count)
-    filtered = jobs_df[jobs_df['matching_skills'] >= 2].copy()
     if filtered.empty:
-        st.warning("No jobs passed the tech‐stack pre‐filter.")
         return pd.DataFrame()
     candidate_text = (
         f"{response.name} {response.location} "
         f"{', '.join(response.skills)} {response.ideal_jobs} "
         f"{response.yoe} {response.experience}"
     )
-    # LLM setup
-    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
-    # llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-001",temperature = 0, api_key=GOOGLE_API_KEY)
     eval_llm = llm.with_structured_output(Job)
-    system_msg = """
-    You are an expert recruiter. Filter by location, experience, and skills,
-    then rate relevance out of 10."""
     prompt = ChatPromptTemplate.from_messages([
         ("system", system_msg),
-        ("human", "Evaluate Job: {job_text} vs Candidate: {candidate_text}.")
     ])
     chain = prompt | eval_llm
@@ -149,10 +250,9 @@ def eval_jobs(jobs_df, resume_text):
     total = len(jobs_for_eval)
     for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
-        # Check the "Stop Evaluation" flag before each iteration
         if not st.session_state.evaluation_running:
-            # User clicked Stop → break out immediately
-            status_text.text("Evaluation halted by user.")
             break
         progress_bar.progress(i / total)
@@ -166,105 +266,139 @@ def eval_jobs(jobs_df, resume_text):
             str(row.YOE)
         ])
-        eval_job = chain.invoke({
-            "job_text": job_text,
-            "candidate_text": candidate_text
-        })
         results.append({
-            "job_title":      eval_job.job_title,
-            "company":        eval_job.company,
-            "location":       eval_job.location,
-            "skills":         eval_job.skills,
-            "description":    eval_job.description,
-            "relevance_score": eval_job.relevance_score,
-            "matching_skills": row.matching_skills
         })
-        time.sleep(5)  # Simulate processing delay
     progress_bar.empty()
     status_text.empty()
-    # Build a DataFrame from whatever has been processed so far
-    if results:
-        df_results = pd.DataFrame(results)
-        # Sort by matching_skills first, then relevance_score
-        df_results = df_results.sort_values(
-            by=["matching_skills", "relevance_score"],
-            ascending=[False, False]
-        ).head(10)
-    else:
-        df_results = pd.DataFrame()
     return df_results
-def preprocess_text(text):
-    return re.sub(r'[^a-zA-Z\s]', '', text.lower())
 def main():
-    st.title("Resume Evaluator and Job Recommender")
-    # Initialize session state flags
-    if 'evaluation_running' not in st.session_state:
         st.session_state.evaluation_running = False
-    if 'evaluation_complete' not in st.session_state:
         st.session_state.evaluation_complete = False
-    uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])
-    # Show “Stop Evaluation” while the loop is running
-    if st.session_state.evaluation_running:
-        if st.button("Stop Evaluation"):
-            # User clicked “Stop” → flip the flag
-            st.session_state.evaluation_running = False
-            st.warning("User requested to stop evaluation.")
-    if uploaded_file is not None:
-        # Only show “Generate Recommendations” if not already running
-        if (not st.session_state.evaluation_running) and st.button("Generate Recommendations"):
-            # Kick off
-            st.session_state.evaluation_running = True
-            st.session_state.evaluation_complete = False
-            # 1. Load jobs
-            jobs_df = load_jobs_data()
-            if jobs_df is None:
-                st.session_state.evaluation_running = False
-                return
-            # 2. Extract text from PDF
-            resume_text = extract_text_from_pdf(uploaded_file)
-            if not resume_text.strip():
-                st.error("Uploaded PDF contains no text.")
                 st.session_state.evaluation_running = False
-                return
-            resume_text = preprocess_text(resume_text)
-            st.success("Resume text extracted successfully!")
-            # 3. Run the evaluation (this may take a while)
-            with st.spinner("Evaluating jobs…"):
-                recs = eval_jobs(jobs_df, resume_text)
-            # 4. Display results (or a warning if nothing returned)
-            if not recs.empty:
-                st.write("Recommended Jobs:")
-                st.dataframe(recs)
-                st.session_state.evaluation_complete = True
-            else:
-                st.warning("No matching jobs found or evaluation was halted early.")
-            # Mark evaluation as done (or halted)
-            st.session_state.evaluation_running = False
-        # After evaluation finishes, allow the user to try another resume
-        if st.session_state.evaluation_complete:
-            if st.button("Try Another Resume"):
-                st.session_state.evaluation_complete = False
-                st.rerun()
 if __name__ == "__main__":
-    main()

 from typing import List
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 import time
 import re
+# ──────────────────────────────────────────────────────────────────────────────
+# 1) ENVIRONMENT VARIABLES / SECRETS
+#
+# On Huggingface Spaces:
+#   - Go to your Space’s Settings → Secrets and add:
+#       • OPENAI_API_KEY = your‐openai‐key
+#       • GOOGLE_API_KEY = your‐google‐key   (if you use any Google LLM)
+#   - If you also need a Google Service Account JSON, either:
+#       a) Commit it (careful: that is public by default – only do so if it’s non‐sensitive!),
+#       b) Or add it as “Repository Files” via the “Files & versions” tab,
+#       c) Or load it from a Secret.
+#
+# In code below, we’ll assume the service‐account JSON is committed under:
+#   └─ synapse-recruitment-34e7b48899b4.json
+#
+# If you instead want to load it from a single‐line environment variable, you can do:
+#   service_account_info = json.loads(os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON"))
+#   creds = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
+#
+# For now, we’ll simply use:
+#   SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
+#
+# And expect that file to be present in the top‐level of your repo/Space.
+#
+# ──────────────────────────────────────────────────────────────────────────────
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")
+if OPENAI_API_KEY == "":
+    st.warning("⚠️ OPENAI_API_KEY is not set. The LLM calls will fail unless you add it under Secrets.")
+# ──────────────────────────────────────────────────────────────────────────────
+# 2) Pydantic models for structured output
+# ──────────────────────────────────────────────────────────────────────────────
 class structure(BaseModel):
     name: str = Field(description="Name of the candidate")
     relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
+# ──────────────────────────────────────────────────────────────────────────────
+# 3) Helper: parse a comma‐separated “Tech Stack” string into a Python set
+# ──────────────────────────────────────────────────────────────────────────────
 def parse_tech_stack(stack):
     if pd.isna(stack) or stack == "" or stack is None:
         return set()
     if isinstance(stack, set):
         return stack
     try:
+        # If it's literally a Python‐set string like "{'python','django'}"
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"").lower() for item in items if item.strip())
+        # Otherwise assume comma‐separated values
         return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
     except Exception as e:
         st.error(f"Error parsing tech stack: {e}")
         return set()
+# ──────────────────────────────────────────────────────────────────────────────
+# 4) Google Sheets initialization (Service Account JSON must be present in repo)
+# ──────────────────────────────────────────────────────────────────────────────
 def initialize_google_sheets():
+    SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
+    SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
     if not os.path.exists(SERVICE_ACCOUNT_FILE):
+        st.error(f"Service account file not found at '{SERVICE_ACCOUNT_FILE}'.\n"
+                 "Either commit it into the repo or load from a Secret.")
+        return None
+    try:
+        creds = service_account.Credentials.from_service_account_file(
+            SERVICE_ACCOUNT_FILE, scopes=SCOPES
+        )
+        return gspread.authorize(creds)
+    except Exception as e:
+        st.error(f"Failed to load Google Service Account credentials: {e}")
         return None
 def load_jobs_data():
     gc = initialize_google_sheets()
     if gc is None:
         return None
     try:
+        # NOTE: Replace this key with your actual spreadsheet key
+        SPREADSHEET_KEY = "1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k"
+        worksheet = gc.open_by_key(SPREADSHEET_KEY).worksheet("paraform_jobs_formatted")
+        all_values = worksheet.get_all_values()
+        if not all_values or len(all_values) < 2:
+            st.warning("No data found in the Jobs sheet.")
+            return None
+        df = pd.DataFrame(all_values[1:], columns=all_values[0]).fillna("")
+        # Add a “parsed_stack” column so we can pre‐filter by skill overlap
+        df["parsed_stack"] = df["Tech Stack"].apply(parse_tech_stack)
         return df
     except Exception as e:
+        st.error(f"Error loading jobs data from Google Sheets: {e}")
         return None
+# ──────────────────────────────────────────────────────────────────────────────
+# 5) PDF → plain text
+# ──────────────────────────────────────────────────────────────────────────────
 def extract_text_from_pdf(pdf_file):
+    try:
+        reader = PyPDF2.PdfReader(pdf_file)
+        full_text = ""
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                full_text += text + "\n"
+        return full_text
+    except Exception as e:
+        st.error(f"Failed to read PDF: {e}")
+        return ""
+# ──────────────────────────────────────────────────────────────────────────────
+# 6) Call GPT‐4o‐mini to extract structured fields from resume text
+# ──────────────────────────────────────────────────────────────────────────────
+def structure_resume_data(resume_text: str) -> structure:
+    llm = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=0.0,
+        max_retries=2,
+    )
     sum_llm = llm.with_structured_output(structure)
     prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a helper that extracts structured data from a resume."),
+        ("human", "Extract the following fields from this resume:\n{resume_text}\n"
+                  "If any field is missing, return ‘Unknown’.")
     ])
+    try:
+        parsed = (prompt | sum_llm).invoke({"resume_text": resume_text})
+        return parsed
+    except Exception as e:
+        st.error(f"Failed to extract structure from resume: {e}")
+        # Return a fallback with “Unknown” fields
+        return structure(
+            name="Unknown",
+            location="Unknown",
+            skills=[],
+            ideal_jobs="Unknown",
+            yoe="Unknown",
+            experience="Unknown"
+        )
+# ──────────────────────────────────────────────────────────────────────────────
+# 7) Evaluate jobs: Pre‐filter by requiring at least two overlapping skills,
+#    then run an LLM loop (with a “Stop” check on each iteration)
+# ──────────────────────────────────────────────────────────────────────────────
+def eval_jobs(jobs_df: pd.DataFrame, resume_text: str) -> pd.DataFrame:
     """
+    1) Extract candidate info (list of skills, etc.)
+    2) Build a skill‐set from response.skills
+    3) Pre‐filter all jobs so that job’s Tech Stack has ≥2 skills in common
+    4) For that filtered subset, run an LLM evaluation loop
+       – on each iteration, check `st.session_state.evaluation_running`:
+         if it has become False, break out immediately.
+    5) Return a DataFrame of top‐10 results (or empty if none).
     """
     response = structure_resume_data(resume_text)
     candidate_skills = set(skill.lower() for skill in response.skills)
+    # How many overlapping skills does each job have?
+    def matching_skill_count(tech_stack: str) -> int:
+        job_skills = set(s.strip().lower() for s in tech_stack.split(",") if s.strip())
         return len(candidate_skills & job_skills)
+    jobs_df["matching_skills"] = jobs_df["Tech Stack"].apply(matching_skill_count)
+    filtered = jobs_df[jobs_df["matching_skills"] >= 2].copy()
     if filtered.empty:
+        st.warning("No jobs passed the 2-skill pre-filter.")
         return pd.DataFrame()
+    # Build a candidate_text blob for the LLM to consume
     candidate_text = (
         f"{response.name} {response.location} "
         f"{', '.join(response.skills)} {response.ideal_jobs} "
         f"{response.yoe} {response.experience}"
     )
+    # LLM setup for job‐evaluation
+    llm = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=0.0,
+        max_retries=2,
+    )
     eval_llm = llm.with_structured_output(Job)
+    system_msg = (
+        "You are an expert recruiter. First, filter by location & experience. "
+        "Then pick jobs that match the candidate’s skills & background. "
+        "Finally, assign a relevance score (0–10)."
+    )
     prompt = ChatPromptTemplate.from_messages([
         ("system", system_msg),
+        ("human", "Evaluate Job: {job_text}\nCandidate: {candidate_text}\n"
+                  "Return JSON with job_title, company, location, skills, description, relevance_score.")
     ])
     chain = prompt | eval_llm
     total = len(jobs_for_eval)
     for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
+        # If the user clicked “Stop Evaluation” → evaluation_running = False
         if not st.session_state.evaluation_running:
+            status_text.text("⏸️ Evaluation halted by user.")
             break
         progress_bar.progress(i / total)
             str(row.YOE)
         ])
+        try:
+            eval_job = chain.invoke({
+                "job_text": job_text,
+                "candidate_text": candidate_text
+            })
+        except Exception as e:
+            st.error(f"LLM failed on job #{i}: {e}")
+            # Skip this job and continue
+            continue
         results.append({
+            "job_title":        eval_job.job_title,
+            "company":          eval_job.company,
+            "location":         eval_job.location,
+            "skills":           eval_job.skills,
+            "description":      eval_job.description,
+            "relevance_score":  eval_job.relevance_score,
+            "matching_skills":  row.matching_skills
         })
+        # Simulate a delay so you can see the Stop button in action
+        time.sleep(0.5)
     progress_bar.empty()
     status_text.empty()
+    if not results:
+        return pd.DataFrame()
+    df_results = pd.DataFrame(results)
+    # Sort first by matching_skills desc, then by relevance_score desc, take top 10
+    df_results = df_results.sort_values(
+        by=["matching_skills", "relevance_score"],
+        ascending=[False, False]
+    ).head(10)
     return df_results
+# ──────────────────────────────────────────────────────────────────────────────
+# 8) Clean résumé text (lowercase, strip special chars)
+# ──────────────────────────────────────────────────────────────────────────────
+def preprocess_text(text: str) -> str:
+    return re.sub(r"[^a-zA-Z\s]", "", text.lower())
+# ──────────────────────────────────────────────────────────────────────────────
+# 9) Streamlit UI
+# ──────────────────────────────────────────────────────────────────────────────
 def main():
+    st.title("📝 Resume Evaluator & Job Recommender")
+    # 9.1) Initialize session state flags
+    if "evaluation_running" not in st.session_state:
         st.session_state.evaluation_running = False
+    if "evaluation_complete" not in st.session_state:
         st.session_state.evaluation_complete = False
+    # 9.2) File uploader
+    uploaded_file = st.file_uploader(
+        "Upload your resume (PDF)",
+        type=["pdf"],
+        help="After picking a PDF, click ‘Generate Recommendations’ below."
+    )
+    # 9.3) Always show BOTH “Generate Recommendations” and “Stop Evaluation” in two columns
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.session_state.evaluation_running:
+            st.button("Generate Recommendations", disabled=True)
+        else:
+            if st.button("Generate Recommendations"):
+                # 9.4) User clicked “Generate” → begin
+                st.session_state.evaluation_running = True
+                st.session_state.evaluation_complete = False
+                # 9.5) Ensure a file was actually uploaded
+                if uploaded_file is None:
+                    st.error("❗ Please upload a PDF before clicking ‘Generate Recommendations’.")
+                    st.session_state.evaluation_running = False
+                else:
+                    # Debug: print basic type of what streamlit handed us
+                    st.write(f"▶️ Received file of type: `{type(uploaded_file)}`")
+                    # 9.6) Load job sheet
+                    jobs_df = load_jobs_data()
+                    if jobs_df is None:
+                        st.session_state.evaluation_running = False
+                        return
+                    # 9.7) Extract text from the PDF
+                    raw_text = extract_text_from_pdf(uploaded_file)
+                    if not raw_text.strip():
+                        st.error("⚠️ The uploaded PDF appears to contain no extractable text.")
+                        st.session_state.evaluation_running = False
+                        return
+                    cleaned = preprocess_text(raw_text)
+                    st.success("✅ Resume text extracted successfully!")
+                    # 9.8) Run the lengthy eval loop inside a spinner
+                    with st.spinner("Evaluating jobs…"):
+                        recommendations = eval_jobs(jobs_df, cleaned)
+                    # 9.9) Show results (or warning if none)
+                    if not recommendations.empty:
+                        st.header("Recommended Jobs")
+                        st.dataframe(recommendations)
+                        st.session_state.evaluation_complete = True
+                    else:
+                        st.warning("No matching jobs found or evaluation was halted mid‐stream.")
+                    # 9.10) Done (or halted)
+                    st.session_state.evaluation_running = False
+    with col2:
+        # The “Stop Evaluation” button is only enabled while evaluation_running is True:
+        if st.session_state.evaluation_running:
+            if st.button("Stop Evaluation"):
                 st.session_state.evaluation_running = False
+                st.warning("⏸️ User requested to stop evaluation.")
+        else:
+            st.button("Stop Evaluation", disabled=True)
+    # 9.11) Once complete, allow “Try Another Resume” to reset
+    if st.session_state.evaluation_complete:
+        if st.button("Try Another Resume"):
+            st.session_state.evaluation_complete = False
+            st.experimental_rerun()
 if __name__ == "__main__":
+    main()