Spaces:
Running
Running
File size: 3,859 Bytes
15f9017 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from PyPDF2 import PdfReader
from docx import Document
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
# Initialize model for semantic similarity
model = SentenceTransformer('all-MiniLM-L6-v2')
def extract_text_from_file(file_path):
if file_path.endswith('.pdf'):
with open(file_path, 'rb') as f:
reader = PdfReader(f)
text = " ".join([page.extract_text() for page in reader.pages])
elif file_path.endswith('.docx'):
doc = Document(file_path)
text = " ".join([para.text for para in doc.paragraphs])
else:
raise ValueError("Unsupported file format")
# Clean text
text = re.sub(r'\s+', ' ', text).strip()
return text
def evaluate_cv(cv_path, job_role):
# Load job requirements (you would have these stored for each role)
job_requirements = load_job_requirements(job_role)
# Extract text from CV
cv_text = extract_text_from_file(cv_path)
# Create embeddings
cv_embedding = model.encode(cv_text)
requirements_embedding = model.encode(job_requirements["required_skills"])
# Calculate similarity
similarity = cosine_similarity(
cv_embedding.reshape(1, -1),
requirements_embedding.reshape(1, -1)
)[0][0]
# Check minimum requirements
rejection_reasons = []
meets_requirements = True
# Check for minimum experience
experience_pattern = r"(\d+)\s+years?"
experience_matches = re.findall(experience_pattern, cv_text.lower())
total_experience = sum(int(match) for match in experience_matches) if experience_matches else 0
if total_experience < job_requirements["min_experience"]:
meets_requirements = False
rejection_reasons.append(
f"Requires {job_requirements['min_experience']} years experience, found {total_experience}"
)
# Check education
education_keywords = job_requirements["required_education"]
has_education = any(keyword.lower() in cv_text.lower() for keyword in education_keywords)
if not has_education:
meets_requirements = False
rejection_reasons.append(
f"Required education not found: {', '.join(education_keywords)}"
)
# Check similarity threshold
if similarity < 0.4: # Adjust threshold as needed
meets_requirements = False
rejection_reasons.append(
"CV content doesn't sufficiently match the required skills"
)
# Prepare CV summary for interview
cv_summary = {
"text": cv_text,
"experience": total_experience,
"skills_similarity": float(similarity),
"education": has_education
}
return {
"is_qualified": meets_requirements,
"rejection_reasons": rejection_reasons,
"cv_summary": cv_summary
}
def load_job_requirements(job_role):
# In a real app, these would be stored in a database or files
requirements = {
"Software Engineer": {
"min_experience": 2,
"required_education": ["Bachelor in Computer Science", "BSc CS", "Engineering"],
"required_skills": """
programming, algorithms, data structures, software development,
testing, debugging, version control, agile methodologies
"""
},
"Data Scientist": {
"min_experience": 3,
"required_education": ["Master", "PhD", "Statistics", "Data Science"],
"required_skills": """
machine learning, statistics, python, R, data analysis,
data visualization, SQL, predictive modeling
"""
}
}
return requirements.get(job_role, requirements["Software Engineer"]) |