from PyPDF2 import PdfReader from docx import Document from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import re import numpy as np import os class CVProcessor: def __init__(self): self.model = SentenceTransformer('all-MiniLM-L6-v2') self.job_reqs = self._load_job_requirements() def extract_text(self, file_path): if file_path.endswith('.pdf'): reader = PdfReader(file_path) return " ".join([page.extract_text() for page in reader.pages]) elif file_path.endswith('.docx'): doc = Document(file_path) return " ".join([para.text for para in doc.paragraphs]) def evaluate(self, cv_path, job_role): cv_text = self.extract_text(cv_path) reqs = self.job_reqs[job_role] # Semantic similarity cv_embed = self.model.encode(cv_text) req_embed = self.model.encode(reqs["required_skills"]) similarity = cosine_similarity([cv_embed], [req_embed])[0][0] # Experience check exp_matches = re.findall(r"(\d+)\s+years?", cv_text.lower()) total_exp = sum(int(m) for m in exp_matches) if exp_matches else 0 is_qualified = (similarity > 0.4 and total_exp >= reqs["min_experience"]) return { "is_qualified": is_qualified, "cv_summary": { "text": cv_text[:2000] + "..." if len(cv_text) > 2000 else cv_text, "experience": total_exp, "skills_match": float(similarity) } } def _load_job_requirements(self): return { "Software Engineer": { "min_experience": 2, "required_skills": "programming, algorithms, software development, testing, debugging" }, "Data Scientist": { "min_experience": 3, "required_skills": "machine learning, statistics, python, data analysis, SQL" } }