import streamlit as st
import pandas as pd
import PyPDF2
import os
from google.oauth2 import service_account
import gspread
from pydantic import BaseModel, Field
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
import time
import re

# ──────────────────────────────────────────────────────────────────────────────
# 1) ENVIRONMENT VARIABLES / SECRETS
#
# On Huggingface Spaces:
#   - Go to your Space’s Settings → Secrets and add:
#       • OPENAI_API_KEY = your‐openai‐key
#       • GOOGLE_API_KEY = your‐google‐key   (if you use any Google LLM)
#   - If you also need a Google Service Account JSON, either:
#       a) Commit it (careful: that is public by default – only do so if it’s non‐sensitive!), 
#       b) Or add it as “Repository Files” via the “Files & versions” tab,
#       c) Or load it from a Secret.
#
# In code below, we’ll assume the service‐account JSON is committed under:
#   └─ synapse-recruitment-34e7b48899b4.json
#
# If you instead want to load it from a single‐line environment variable, you can do:
#   service_account_info = json.loads(os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON"))
#   creds = service_account.Credentials.from_service_account_info(service_account_info, scopes=SCOPES)
#
# For now, we’ll simply use:
#   SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
#
# And expect that file to be present in the top‐level of your repo/Space.
#
# ──────────────────────────────────────────────────────────────────────────────


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "")

if OPENAI_API_KEY == "":
    st.warning("⚠️ OPENAI_API_KEY is not set. The LLM calls will fail unless you add it under Secrets.")

# ──────────────────────────────────────────────────────────────────────────────
# 2) Pydantic models for structured output
# ──────────────────────────────────────────────────────────────────────────────

class structure(BaseModel):
    name: str = Field(description="Name of the candidate")
    location: str = Field(description="The location of the candidate.")
    skills: List[str] = Field(description="List of individual skills of the candidate")
    ideal_jobs: str = Field(description="List of ideal jobs for the candidate based on past experience.")
    yoe: str = Field(description="Years of experience of the candidate.")
    experience: str = Field(description="A brief summary of the candidate's past experience.")


class Job(BaseModel):
    job_title: str = Field(description="The title of the job.")
    company: str = Field(description="The company offering the job.")
    location: str = Field(description="The location of the job.")
    skills: List[str] = Field(description="List of skills required for the job.")
    description: str = Field(description="A brief description of the job.")
    relevance_score: float = Field(description="Relevance score of the job to the candidate's resume.")
    justification: str = Field(description = "Reason for giving this relevance score and what all areas need to be improved by the candidate")


# ──────────────────────────────────────────────────────────────────────────────
# 3) Helper: parse a comma‐separated “Tech Stack” string into a Python set
# ──────────────────────────────────────────────────────────────────────────────

def parse_tech_stack(stack):
    if pd.isna(stack) or stack == "" or stack is None:
        return set()
    if isinstance(stack, set):
        return stack
    try:
        # If it's literally a Python‐set string like "{'python','django'}"
        if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
            items = stack.strip("{}").split(",")
            return set(item.strip().strip("'\"").lower() for item in items if item.strip())
        # Otherwise assume comma‐separated values
        return set(s.strip().lower() for s in str(stack).split(",") if s.strip())
    except Exception as e:
        st.error(f"Error parsing tech stack: {e}")
        return set()


# ──────────────────────────────────────────────────────────────────────────────
# 4) Google Sheets initialization (Service Account JSON must be present in repo)
# ──────────────────────────────────────────────────────────────────────────────

def initialize_google_sheets():
    SERVICE_ACCOUNT_FILE = "synapse-recruitment-34e7b48899b4.json"
    SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
    if not os.path.exists(SERVICE_ACCOUNT_FILE):
        st.error(f"Service account file not found at '{SERVICE_ACCOUNT_FILE}'.\n"
                 "Either commit it into the repo or load from a Secret.")
        return None

    try:
        creds = service_account.Credentials.from_service_account_file(
            SERVICE_ACCOUNT_FILE, scopes=SCOPES
        )
        return gspread.authorize(creds)
    except Exception as e:
        st.error(f"Failed to load Google Service Account credentials: {e}")
        return None


def load_jobs_data():
    gc = initialize_google_sheets()
    if gc is None:
        return None

    try:
        # NOTE: Replace this key with your actual spreadsheet key
        SPREADSHEET_KEY = "1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k"
        worksheet = gc.open_by_key(SPREADSHEET_KEY).worksheet("paraform_jobs_formatted")
        all_values = worksheet.get_all_values()
        if not all_values or len(all_values) < 2:
            st.warning("No data found in the Jobs sheet.")
            return None

        df = pd.DataFrame(all_values[1:], columns=all_values[0]).fillna("")
        # Add a “parsed_stack” column so we can pre‐filter by skill overlap
        df["parsed_stack"] = df["Tech Stack"].apply(parse_tech_stack)
        return df

    except Exception as e:
        st.error(f"Error loading jobs data from Google Sheets: {e}")
        return None


# ──────────────────────────────────────────────────────────────────────────────
# 5) PDF → plain text
# ──────────────────────────────────────────────────────────────────────────────

def extract_text_from_pdf(pdf_file):
    try:
        reader = PyPDF2.PdfReader(pdf_file)
        full_text = ""
        for page in reader.pages:
            text = page.extract_text()
            if text:
                full_text += text + "\n"
        return full_text
    except Exception as e:
        st.error(f"Failed to read PDF: {e}")
        return ""


# ──────────────────────────────────────────────────────────────────────────────
# 6) Call GPT‐4o‐mini to extract structured fields from resume text
# ──────────────────────────────────────────────────────────────────────────────

def structure_resume_data(resume_text: str) -> structure:
    llm = ChatOpenAI(
        model="gpt-4o-mini",
        temperature=0.0,
        max_retries=2,
    )
    sum_llm = llm.with_structured_output(structure)

    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helper that extracts structured data from a resume."),
        ("human", "Extract the following fields from this resume:\n{resume_text}\n"
                  "If any field is missing, return ‘Unknown’.")
    ])

    try:
        parsed = (prompt | sum_llm).invoke({"resume_text": resume_text})
        return parsed
    except Exception as e:
        st.error(f"Failed to extract structure from resume: {e}")
        # Return a fallback with “Unknown” fields
        return structure(
            name="Unknown",
            location="Unknown",
            skills=[],
            ideal_jobs="Unknown",
            yoe="Unknown",
            experience="Unknown"
        )


# ──────────────────────────────────────────────────────────────────────────────
# 7) Evaluate jobs: Pre‐filter by requiring at least two overlapping skills,
#    then run an LLM loop (with a “Stop” check on each iteration)
# ──────────────────────────────────────────────────────────────────────────────

def eval_jobs(jobs_df: pd.DataFrame, resume_text: str) -> pd.DataFrame:
    """
    1) Extract candidate info (list of skills, etc.)
    2) Build a skill‐set from response.skills
    3) Pre‐filter all jobs so that job’s Tech Stack has ≥2 skills in common
    4) For that filtered subset, run an LLM evaluation loop
       – on each iteration, check `st.session_state.evaluation_running`:
         if it has become False, break out immediately.
    5) Return a DataFrame of top‐10 results (or empty if none).
    """
    response = structure_resume_data(resume_text)
    candidate_skills = set(skill.lower() for skill in response.skills)

    # How many overlapping skills does each job have?
    def matching_skill_count(tech_stack: str) -> int:
        job_skills = set(s.strip().lower() for s in tech_stack.split(",") if s.strip())
        return len(candidate_skills & job_skills)

    jobs_df["matching_skills"] = jobs_df["Tech Stack"].apply(matching_skill_count)
    filtered = jobs_df[jobs_df["matching_skills"] >= 2].copy()

    if filtered.empty:
        st.warning("No jobs passed the 2-skill pre-filter.")
        return pd.DataFrame()

    # Build a candidate_text blob for the LLM to consume
    candidate_text = (
        f"{response.name} {response.location} "
        f"{', '.join(response.skills)} {response.ideal_jobs} "
        f"{response.yoe} {response.experience}"
    )

    # LLM setup for job‐evaluation
    llm = ChatOpenAI(
        model="gpt-4o-mini",
        temperature=0.0,
        max_retries=2,
    )
    eval_llm = llm.with_structured_output(Job)

    system_msg = (
        "You are an expert recruiter. First, filter by location & experience. "
        "Then pick jobs that match the candidate’s skills & background. "
        "Finally, assign a relevance score (0–10)."
    )
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_msg),
        ("human", "Evaluate Job: {job_text}\nCandidate: {candidate_text}\n"
                  "Return JSON with job_title, company, location, skills, description, relevance_score.")
    ])
    chain = prompt | eval_llm

    jobs_for_eval = filtered[["Company", "Role", "Locations", "parsed_stack", "YOE", "matching_skills"]]
    results = []

    progress_bar = st.progress(0)
    status_text = st.empty()
    total = len(jobs_for_eval)

    for i, row in enumerate(jobs_for_eval.itertuples(), start=1):
        # If the user clicked “Stop Evaluation” → evaluation_running = False
        if not st.session_state.evaluation_running:
            status_text.text("⏸️ Evaluation halted by user.")
            break

        progress_bar.progress(i / total)
        status_text.text(f"Evaluating job {i}/{total}: {row.Role} at {row.Company}")

        job_text = " ".join([
            row.Role,
            row.Company,
            row.Locations,
            ", ".join(row.parsed_stack),
            str(row.YOE)
        ])

        try:
            eval_job = chain.invoke({
                "job_text": job_text,
                "candidate_text": candidate_text
            })
        except Exception as e:
            st.error(f"LLM failed on job #{i}: {e}")
            # Skip this job and continue
            continue

        results.append({
            "job_title":        eval_job.job_title,
            "company":          eval_job.company,
            "location":         eval_job.location,
            "skills":           eval_job.skills,
            "description":      eval_job.description,
            "relevance_score":  eval_job.relevance_score,
            "matching_skills":  row.matching_skills
        })

        # Simulate a delay so you can see the Stop button in action
        time.sleep(0.5)

    progress_bar.empty()
    status_text.empty()

    if not results:
        return pd.DataFrame()

    df_results = pd.DataFrame(results)
    # Sort first by matching_skills desc, then by relevance_score desc, take top 10
    df_results = df_results.sort_values(
        by=["matching_skills", "relevance_score"],
        ascending=[False, False]
    ).head(10)

    return df_results


# ──────────────────────────────────────────────────────────────────────────────
# 8) Clean résumé text (lowercase, strip special chars)
# ──────────────────────────────────────────────────────────────────────────────

def preprocess_text(text: str) -> str:
    return re.sub(r"[^a-zA-Z\s]", "", text.lower())


# ──────────────────────────────────────────────────────────────────────────────
# 9) Streamlit UI
# ──────────────────────────────────────────────────────────────────────────────

def main():
    st.title("📝 Resume Evaluator & Job Recommender")

    # 9.1) Initialize session state flags
    if "evaluation_running" not in st.session_state:
        st.session_state.evaluation_running = False
    if "evaluation_complete" not in st.session_state:
        st.session_state.evaluation_complete = False

    # 9.2) File uploader
    uploaded_file = st.file_uploader(
        "Upload your resume (PDF)",
        type=["pdf"],
        help="After picking a PDF, click ‘Generate Recommendations’ below."
    )

    # 9.3) Always show BOTH “Generate Recommendations” and “Stop Evaluation” in two columns
    col1, col2 = st.columns(2)

    with col1:
        if st.session_state.evaluation_running:
            st.button("Generate Recommendations", disabled=True)
        else:
            if st.button("Generate Recommendations"):
                # 9.4) User clicked “Generate” → begin
                st.session_state.evaluation_running = True
                st.session_state.evaluation_complete = False

                # 9.5) Ensure a file was actually uploaded
                if uploaded_file is None:
                    st.error("❗ Please upload a PDF before clicking ‘Generate Recommendations’.")
                    st.session_state.evaluation_running = False
                else:
                    # Debug: print basic type of what streamlit handed us
                    st.write(f"▶️ Received file of type: `{type(uploaded_file)}`")

                    # 9.6) Load job sheet
                    jobs_df = load_jobs_data()
                    if jobs_df is None:
                        st.session_state.evaluation_running = False
                        return

                    # 9.7) Extract text from the PDF
                    raw_text = extract_text_from_pdf(uploaded_file)
                    if not raw_text.strip():
                        st.error("⚠️ The uploaded PDF appears to contain no extractable text.")
                        st.session_state.evaluation_running = False
                        return

                    cleaned = preprocess_text(raw_text)
                    st.success("✅ Resume text extracted successfully!")

                    # 9.8) Run the lengthy eval loop inside a spinner
                    with st.spinner("Evaluating jobs…"):
                        recommendations = eval_jobs(jobs_df, cleaned)

                    # 9.9) Show results (or warning if none)
                    if not recommendations.empty:
                        st.header("Recommended Jobs")
                        st.dataframe(recommendations)
                        st.session_state.evaluation_complete = True
                    else:
                        st.warning("No matching jobs found or evaluation was halted mid‐stream.")

                    # 9.10) Done (or halted)
                    st.session_state.evaluation_running = False

    with col2:
        # The “Stop Evaluation” button is only enabled while evaluation_running is True:
        if st.session_state.evaluation_running:
            if st.button("Stop Evaluation"):
                st.session_state.evaluation_running = False
                st.warning("⏸️ User requested to stop evaluation.")
        else:
            st.button("Stop Evaluation", disabled=True)

    # 9.11) Once complete, allow “Try Another Resume” to reset
    if st.session_state.evaluation_complete:
        if st.button("Try Another Resume"):
            st.session_state.evaluation_complete = False
            st.experimental_rerun()


if __name__ == "__main__":
    main()