Jekyll2000 commited on
Commit
dd8e37c
·
verified ·
1 Parent(s): 640099a

Create utils/cv_processor.py

Browse files
Files changed (1) hide show
  1. utils/cv_processor.py +57 -0
utils/cv_processor.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from docx import Document
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import re
6
+ import numpy as np
7
+ import os
8
+
9
+ class CVProcessor:
10
+ def __init__(self):
11
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
12
+ self.job_reqs = self._load_job_requirements()
13
+
14
+ def extract_text(self, file_path):
15
+ if file_path.endswith('.pdf'):
16
+ reader = PdfReader(file_path)
17
+ return " ".join([page.extract_text() for page in reader.pages])
18
+ elif file_path.endswith('.docx'):
19
+ doc = Document(file_path)
20
+ return " ".join([para.text for para in doc.paragraphs])
21
+
22
+ def evaluate(self, cv_path, job_role):
23
+ cv_text = self.extract_text(cv_path)
24
+ reqs = self.job_reqs[job_role]
25
+
26
+ # Semantic similarity
27
+ cv_embed = self.model.encode(cv_text)
28
+ req_embed = self.model.encode(reqs["required_skills"])
29
+ similarity = cosine_similarity([cv_embed], [req_embed])[0][0]
30
+
31
+ # Experience check
32
+ exp_matches = re.findall(r"(\d+)\s+years?", cv_text.lower())
33
+ total_exp = sum(int(m) for m in exp_matches) if exp_matches else 0
34
+
35
+ is_qualified = (similarity > 0.4 and
36
+ total_exp >= reqs["min_experience"])
37
+
38
+ return {
39
+ "is_qualified": is_qualified,
40
+ "cv_summary": {
41
+ "text": cv_text[:2000] + "..." if len(cv_text) > 2000 else cv_text,
42
+ "experience": total_exp,
43
+ "skills_match": float(similarity)
44
+ }
45
+ }
46
+
47
+ def _load_job_requirements(self):
48
+ return {
49
+ "Software Engineer": {
50
+ "min_experience": 2,
51
+ "required_skills": "programming, algorithms, software development, testing, debugging"
52
+ },
53
+ "Data Scientist": {
54
+ "min_experience": 3,
55
+ "required_skills": "machine learning, statistics, python, data analysis, SQL"
56
+ }
57
+ }