Spaces:
Running
Running
Create utils/cv_processor.py
Browse files- utils/cv_processor.py +57 -0
utils/cv_processor.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PyPDF2 import PdfReader
|
2 |
+
from docx import Document
|
3 |
+
from sentence_transformers import SentenceTransformer
|
4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
5 |
+
import re
|
6 |
+
import numpy as np
|
7 |
+
import os
|
8 |
+
|
9 |
+
class CVProcessor:
|
10 |
+
def __init__(self):
|
11 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
12 |
+
self.job_reqs = self._load_job_requirements()
|
13 |
+
|
14 |
+
def extract_text(self, file_path):
|
15 |
+
if file_path.endswith('.pdf'):
|
16 |
+
reader = PdfReader(file_path)
|
17 |
+
return " ".join([page.extract_text() for page in reader.pages])
|
18 |
+
elif file_path.endswith('.docx'):
|
19 |
+
doc = Document(file_path)
|
20 |
+
return " ".join([para.text for para in doc.paragraphs])
|
21 |
+
|
22 |
+
def evaluate(self, cv_path, job_role):
|
23 |
+
cv_text = self.extract_text(cv_path)
|
24 |
+
reqs = self.job_reqs[job_role]
|
25 |
+
|
26 |
+
# Semantic similarity
|
27 |
+
cv_embed = self.model.encode(cv_text)
|
28 |
+
req_embed = self.model.encode(reqs["required_skills"])
|
29 |
+
similarity = cosine_similarity([cv_embed], [req_embed])[0][0]
|
30 |
+
|
31 |
+
# Experience check
|
32 |
+
exp_matches = re.findall(r"(\d+)\s+years?", cv_text.lower())
|
33 |
+
total_exp = sum(int(m) for m in exp_matches) if exp_matches else 0
|
34 |
+
|
35 |
+
is_qualified = (similarity > 0.4 and
|
36 |
+
total_exp >= reqs["min_experience"])
|
37 |
+
|
38 |
+
return {
|
39 |
+
"is_qualified": is_qualified,
|
40 |
+
"cv_summary": {
|
41 |
+
"text": cv_text[:2000] + "..." if len(cv_text) > 2000 else cv_text,
|
42 |
+
"experience": total_exp,
|
43 |
+
"skills_match": float(similarity)
|
44 |
+
}
|
45 |
+
}
|
46 |
+
|
47 |
+
def _load_job_requirements(self):
|
48 |
+
return {
|
49 |
+
"Software Engineer": {
|
50 |
+
"min_experience": 2,
|
51 |
+
"required_skills": "programming, algorithms, software development, testing, debugging"
|
52 |
+
},
|
53 |
+
"Data Scientist": {
|
54 |
+
"min_experience": 3,
|
55 |
+
"required_skills": "machine learning, statistics, python, data analysis, SQL"
|
56 |
+
}
|
57 |
+
}
|