Spaces:
Running
Running
from PyPDF2 import PdfReader | |
from docx import Document | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import re | |
import numpy as np | |
import os | |
class CVProcessor: | |
def __init__(self): | |
self.model = SentenceTransformer('all-MiniLM-L6-v2') | |
self.job_reqs = self._load_job_requirements() | |
def extract_text(self, file_path): | |
if file_path.endswith('.pdf'): | |
reader = PdfReader(file_path) | |
return " ".join([page.extract_text() for page in reader.pages]) | |
elif file_path.endswith('.docx'): | |
doc = Document(file_path) | |
return " ".join([para.text for para in doc.paragraphs]) | |
def evaluate(self, cv_path, job_role): | |
cv_text = self.extract_text(cv_path) | |
reqs = self.job_reqs[job_role] | |
# Semantic similarity | |
cv_embed = self.model.encode(cv_text) | |
req_embed = self.model.encode(reqs["required_skills"]) | |
similarity = cosine_similarity([cv_embed], [req_embed])[0][0] | |
# Experience check | |
exp_matches = re.findall(r"(\d+)\s+years?", cv_text.lower()) | |
total_exp = sum(int(m) for m in exp_matches) if exp_matches else 0 | |
is_qualified = (similarity > 0.4 and | |
total_exp >= reqs["min_experience"]) | |
return { | |
"is_qualified": is_qualified, | |
"cv_summary": { | |
"text": cv_text[:2000] + "..." if len(cv_text) > 2000 else cv_text, | |
"experience": total_exp, | |
"skills_match": float(similarity) | |
} | |
} | |
def _load_job_requirements(self): | |
return { | |
"Software Engineer": { | |
"min_experience": 2, | |
"required_skills": "programming, algorithms, software development, testing, debugging" | |
}, | |
"Data Scientist": { | |
"min_experience": 3, | |
"required_skills": "machine learning, statistics, python, data analysis, SQL" | |
} | |
} |