import gradio as gr
from transformers import pipeline
import pandas as pd
import spacy
import re
from pathlib import Path
import PyPDF2
import docx
import json

# Load models
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
keyword_extractor = pipeline("token-classification", model="jean-baptiste/roberta-large-ner-english")
classifier = pipeline("text-classification", model="microsoft/MiniLM-L12-H384-uncased")

def extract_text_from_resume(file):
    file_path = file.name
    text = ""
    
    if file_path.endswith('.pdf'):
        with open(file_path, 'rb') as pdf_file:
            pdf_reader = PyPDF2.PdfReader(pdf_file)
            for page in pdf_reader.pages:
                text += page.extract_text()
    
    elif file_path.endswith('.docx'):
        doc = docx.Document(file_path)
        for paragraph in doc.paragraphs:
            text += paragraph.text + '\n'
    
    elif file_path.endswith('.txt'):
        with open(file_path, 'r', encoding='utf-8') as txt_file:
            text = txt_file.read()
            
    return text.strip()

def extract_information(text):
    doc = nlp(text)
    
    entities = {
        "skills": [],
        "education": [],
        "experience": [],
        "contact": []
    }
    
    # Extract skills (using a predefined list of common skills)
    common_skills = ["python", "java", "javascript", "sql", "machine learning", "data analysis"]
    text_lower = text.lower()
    entities["skills"] = [skill for skill in common_skills if skill in text_lower]
    
    # Extract education
    education_keywords = ["university", "college", "bachelor", "master", "phd", "degree"]
    for sent in doc.sents:
        if any(keyword in sent.text.lower() for keyword in education_keywords):
            entities["education"].append(sent.text.strip())
    
    # Extract experience
    experience_keywords = ["experience", "work", "job", "position", "role"]
    for sent in doc.sents:
        if any(keyword in sent.text.lower() for keyword in experience_keywords):
            entities["experience"].append(sent.text.strip())
    
    # Extract contact information
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b'
    
    emails = re.findall(email_pattern, text)
    phones = re.findall(phone_pattern, text)
    
    entities["contact"] = emails + phones
    
    return entities

def analyze_resume(text, entities):
    scores = {
        "completeness": 0,
        "skills_match": 0,
        "formatting": 0,
        "keyword_optimization": 0
    }
    
    # Completeness score
    score_components = 0
    if entities["skills"]: score_components += 1
    if entities["education"]: score_components += 1
    if entities["experience"]: score_components += 1
    if entities["contact"]: score_components += 1
    scores["completeness"] = (score_components / 4) * 100
    
    # Skills match score
    desired_skills = ["python", "java", "javascript", "sql", "machine learning"]
    matched_skills = sum(1 for skill in entities["skills"] if skill in desired_skills)
    scores["skills_match"] = (matched_skills / len(desired_skills)) * 100
    
    # Formatting score
    formatting_score = 0
    if len(text.split('\n')) > 5: formatting_score += 20
    if len(text) > 200: formatting_score += 20
    if any(char.isupper() for char in text): formatting_score += 20
    if re.search(r'\b\d{4}\b', text): formatting_score += 20
    if len(re.findall(r'[.!?]', text)) > 3: formatting_score += 20
    scores["formatting"] = formatting_score
    
    # Keyword optimization score
    keywords = keyword_extractor(text[:512])
    scores["keyword_optimization"] = min(len(keywords) * 10, 100)
    
    return scores

def generate_recommendations(scores, entities):
    recommendations = []
    
    if scores["completeness"] < 75:
        recommendations.append("📋 Add more sections to your resume to improve completeness.")
        if not entities["skills"]:
            recommendations.append("- Add a skills section")
        if not entities["education"]:
            recommendations.append("- Add education details")
        if not entities["experience"]:
            recommendations.append("- Add work experience")
        if not entities["contact"]:
            recommendations.append("- Add contact information")
    
    if scores["skills_match"] < 60:
        recommendations.append("\n💡 Consider adding more relevant skills:")
        recommendations.append("- Focus on technical skills like Python, Java, SQL")
        recommendations.append("- Include both hard and soft skills")
    
    if scores["formatting"] < 80:
        recommendations.append("\n📑 Improve resume formatting:")
        recommendations.append("- Use clear section headings")
        recommendations.append("- Include dates for experiences")
        recommendations.append("- Use bullet points for better readability")
    
    if scores["keyword_optimization"] < 70:
        recommendations.append("\n🔍 Optimize keywords usage:")
        recommendations.append("- Use more industry-specific terms")
        recommendations.append("- Include action verbs")
        recommendations.append("- Mention specific technologies and tools")
    
    return "\n".join(recommendations)

def process_resume(file):
    text = extract_text_from_resume(file)
    entities = extract_information(text)
    scores = analyze_resume(text, entities)
    recommendations = generate_recommendations(scores, entities)
    
    return scores, recommendations

def create_interface():
    with gr.Blocks() as app:
        gr.Markdown("""
        # Resume Analyzer and Optimizer
        Upload your resume to get personalized analysis and recommendations.
        """)
        
        with gr.Row():
            file_input = gr.File(
                label="Upload Resume (PDF, DOCX, or TXT)",
                file_types=["pdf", "docx", "txt"]
            )
            
        with gr.Row():
            analyze_button = gr.Button("Analyze Resume", variant="primary")
            
        with gr.Row():
            with gr.Column():
                score_output = gr.JSON(label="Analysis Scores")
            with gr.Column():
                recommendations_output = gr.Textbox(
                    label="Recommendations",
                    lines=10
                )
                
        analyze_button.click(
            fn=process_resume,
            inputs=[file_input],
            outputs=[score_output, recommendations_output]
        )
    
    return app

if __name__ == "__main__":
    app = create_interface()
    app.launch()