from PyPDF2 import PdfReader from docx import Document import re from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import os # Initialize model for semantic similarity model = SentenceTransformer('all-MiniLM-L6-v2') def extract_text_from_file(file_path): if file_path.endswith('.pdf'): with open(file_path, 'rb') as f: reader = PdfReader(f) text = " ".join([page.extract_text() for page in reader.pages]) elif file_path.endswith('.docx'): doc = Document(file_path) text = " ".join([para.text for para in doc.paragraphs]) else: raise ValueError("Unsupported file format") # Clean text text = re.sub(r'\s+', ' ', text).strip() return text def evaluate_cv(cv_path, job_role): # Load job requirements (you would have these stored for each role) job_requirements = load_job_requirements(job_role) # Extract text from CV cv_text = extract_text_from_file(cv_path) # Create embeddings cv_embedding = model.encode(cv_text) requirements_embedding = model.encode(job_requirements["required_skills"]) # Calculate similarity similarity = cosine_similarity( cv_embedding.reshape(1, -1), requirements_embedding.reshape(1, -1) )[0][0] # Check minimum requirements rejection_reasons = [] meets_requirements = True # Check for minimum experience experience_pattern = r"(\d+)\s+years?" experience_matches = re.findall(experience_pattern, cv_text.lower()) total_experience = sum(int(match) for match in experience_matches) if experience_matches else 0 if total_experience < job_requirements["min_experience"]: meets_requirements = False rejection_reasons.append( f"Requires {job_requirements['min_experience']} years experience, found {total_experience}" ) # Check education education_keywords = job_requirements["required_education"] has_education = any(keyword.lower() in cv_text.lower() for keyword in education_keywords) if not has_education: meets_requirements = False rejection_reasons.append( f"Required education not found: {', '.join(education_keywords)}" ) # Check similarity threshold if similarity < 0.4: # Adjust threshold as needed meets_requirements = False rejection_reasons.append( "CV content doesn't sufficiently match the required skills" ) # Prepare CV summary for interview cv_summary = { "text": cv_text, "experience": total_experience, "skills_similarity": float(similarity), "education": has_education } return { "is_qualified": meets_requirements, "rejection_reasons": rejection_reasons, "cv_summary": cv_summary } def load_job_requirements(job_role): # In a real app, these would be stored in a database or files requirements = { "Software Engineer": { "min_experience": 2, "required_education": ["Bachelor in Computer Science", "BSc CS", "Engineering"], "required_skills": """ programming, algorithms, data structures, software development, testing, debugging, version control, agile methodologies """ }, "Data Scientist": { "min_experience": 3, "required_education": ["Master", "PhD", "Statistics", "Data Science"], "required_skills": """ machine learning, statistics, python, R, data analysis, data visualization, SQL, predictive modeling """ } } return requirements.get(job_role, requirements["Software Engineer"])