import gradio as gr from transformers import pipeline import pandas as pd import spacy import re from pathlib import Path import PyPDF2 import docx import json # Load models try: nlp = spacy.load("en_core_web_sm") except OSError: from spacy.cli import download download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") keyword_extractor = pipeline("token-classification", model="jean-baptiste/roberta-large-ner-english") classifier = pipeline("text-classification", model="microsoft/MiniLM-L12-H384-uncased") def extract_text_from_resume(file): file_path = file.name text = "" if file_path.endswith('.pdf'): with open(file_path, 'rb') as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) for page in pdf_reader.pages: text += page.extract_text() elif file_path.endswith('.docx'): doc = docx.Document(file_path) for paragraph in doc.paragraphs: text += paragraph.text + '\n' elif file_path.endswith('.txt'): with open(file_path, 'r', encoding='utf-8') as txt_file: text = txt_file.read() return text.strip() def extract_information(text): doc = nlp(text) entities = { "skills": [], "education": [], "experience": [], "contact": [] } # Extract skills (using a predefined list of common skills) common_skills = ["python", "java", "javascript", "sql", "machine learning", "data analysis"] text_lower = text.lower() entities["skills"] = [skill for skill in common_skills if skill in text_lower] # Extract education education_keywords = ["university", "college", "bachelor", "master", "phd", "degree"] for sent in doc.sents: if any(keyword in sent.text.lower() for keyword in education_keywords): entities["education"].append(sent.text.strip()) # Extract experience experience_keywords = ["experience", "work", "job", "position", "role"] for sent in doc.sents: if any(keyword in sent.text.lower() for keyword in experience_keywords): entities["experience"].append(sent.text.strip()) # Extract contact information email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b' phone_pattern = r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b' emails = re.findall(email_pattern, text) phones = re.findall(phone_pattern, text) entities["contact"] = emails + phones return entities def analyze_resume(text, entities): scores = { "completeness": 0, "skills_match": 0, "formatting": 0, "keyword_optimization": 0 } # Completeness score score_components = 0 if entities["skills"]: score_components += 1 if entities["education"]: score_components += 1 if entities["experience"]: score_components += 1 if entities["contact"]: score_components += 1 scores["completeness"] = (score_components / 4) * 100 # Skills match score desired_skills = ["python", "java", "javascript", "sql", "machine learning"] matched_skills = sum(1 for skill in entities["skills"] if skill in desired_skills) scores["skills_match"] = (matched_skills / len(desired_skills)) * 100 # Formatting score formatting_score = 0 if len(text.split('\n')) > 5: formatting_score += 20 if len(text) > 200: formatting_score += 20 if any(char.isupper() for char in text): formatting_score += 20 if re.search(r'\b\d{4}\b', text): formatting_score += 20 if len(re.findall(r'[.!?]', text)) > 3: formatting_score += 20 scores["formatting"] = formatting_score # Keyword optimization score keywords = keyword_extractor(text[:512]) scores["keyword_optimization"] = min(len(keywords) * 10, 100) return scores def generate_recommendations(scores, entities): recommendations = [] if scores["completeness"] < 75: recommendations.append("šŸ“‹ Add more sections to your resume to improve completeness.") if not entities["skills"]: recommendations.append("- Add a skills section") if not entities["education"]: recommendations.append("- Add education details") if not entities["experience"]: recommendations.append("- Add work experience") if not entities["contact"]: recommendations.append("- Add contact information") if scores["skills_match"] < 60: recommendations.append("\nšŸ’” Consider adding more relevant skills:") recommendations.append("- Focus on technical skills like Python, Java, SQL") recommendations.append("- Include both hard and soft skills") if scores["formatting"] < 80: recommendations.append("\nšŸ“‘ Improve resume formatting:") recommendations.append("- Use clear section headings") recommendations.append("- Include dates for experiences") recommendations.append("- Use bullet points for better readability") if scores["keyword_optimization"] < 70: recommendations.append("\nšŸ” Optimize keywords usage:") recommendations.append("- Use more industry-specific terms") recommendations.append("- Include action verbs") recommendations.append("- Mention specific technologies and tools") return "\n".join(recommendations) def process_resume(file): text = extract_text_from_resume(file) entities = extract_information(text) scores = analyze_resume(text, entities) recommendations = generate_recommendations(scores, entities) return scores, recommendations def create_interface(): with gr.Blocks() as app: gr.Markdown(""" # Resume Analyzer and Optimizer Upload your resume to get personalized analysis and recommendations. """) with gr.Row(): file_input = gr.File( label="Upload Resume (PDF, DOCX, or TXT)", file_types=["pdf", "docx", "txt"] ) with gr.Row(): analyze_button = gr.Button("Analyze Resume", variant="primary") with gr.Row(): with gr.Column(): score_output = gr.JSON(label="Analysis Scores") with gr.Column(): recommendations_output = gr.Textbox( label="Recommendations", lines=10 ) analyze_button.click( fn=process_resume, inputs=[file_input], outputs=[score_output, recommendations_output] ) return app if __name__ == "__main__": app = create_interface() app.launch()