Jekyll2000 commited on
Commit
15f9017
·
verified ·
1 Parent(s): f1e44b7

Create utils/cv_processor.py

Browse files
Files changed (1) hide show
  1. utils/cv_processor.py +111 -0
utils/cv_processor.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PyPDF2 import PdfReader
2
+ from docx import Document
3
+ import re
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import numpy as np
7
+ import os
8
+
9
+ # Initialize model for semantic similarity
10
+ model = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ def extract_text_from_file(file_path):
13
+ if file_path.endswith('.pdf'):
14
+ with open(file_path, 'rb') as f:
15
+ reader = PdfReader(f)
16
+ text = " ".join([page.extract_text() for page in reader.pages])
17
+ elif file_path.endswith('.docx'):
18
+ doc = Document(file_path)
19
+ text = " ".join([para.text for para in doc.paragraphs])
20
+ else:
21
+ raise ValueError("Unsupported file format")
22
+
23
+ # Clean text
24
+ text = re.sub(r'\s+', ' ', text).strip()
25
+ return text
26
+
27
+ def evaluate_cv(cv_path, job_role):
28
+ # Load job requirements (you would have these stored for each role)
29
+ job_requirements = load_job_requirements(job_role)
30
+
31
+ # Extract text from CV
32
+ cv_text = extract_text_from_file(cv_path)
33
+
34
+ # Create embeddings
35
+ cv_embedding = model.encode(cv_text)
36
+ requirements_embedding = model.encode(job_requirements["required_skills"])
37
+
38
+ # Calculate similarity
39
+ similarity = cosine_similarity(
40
+ cv_embedding.reshape(1, -1),
41
+ requirements_embedding.reshape(1, -1)
42
+ )[0][0]
43
+
44
+ # Check minimum requirements
45
+ rejection_reasons = []
46
+ meets_requirements = True
47
+
48
+ # Check for minimum experience
49
+ experience_pattern = r"(\d+)\s+years?"
50
+ experience_matches = re.findall(experience_pattern, cv_text.lower())
51
+ total_experience = sum(int(match) for match in experience_matches) if experience_matches else 0
52
+
53
+ if total_experience < job_requirements["min_experience"]:
54
+ meets_requirements = False
55
+ rejection_reasons.append(
56
+ f"Requires {job_requirements['min_experience']} years experience, found {total_experience}"
57
+ )
58
+
59
+ # Check education
60
+ education_keywords = job_requirements["required_education"]
61
+ has_education = any(keyword.lower() in cv_text.lower() for keyword in education_keywords)
62
+
63
+ if not has_education:
64
+ meets_requirements = False
65
+ rejection_reasons.append(
66
+ f"Required education not found: {', '.join(education_keywords)}"
67
+ )
68
+
69
+ # Check similarity threshold
70
+ if similarity < 0.4: # Adjust threshold as needed
71
+ meets_requirements = False
72
+ rejection_reasons.append(
73
+ "CV content doesn't sufficiently match the required skills"
74
+ )
75
+
76
+ # Prepare CV summary for interview
77
+ cv_summary = {
78
+ "text": cv_text,
79
+ "experience": total_experience,
80
+ "skills_similarity": float(similarity),
81
+ "education": has_education
82
+ }
83
+
84
+ return {
85
+ "is_qualified": meets_requirements,
86
+ "rejection_reasons": rejection_reasons,
87
+ "cv_summary": cv_summary
88
+ }
89
+
90
+ def load_job_requirements(job_role):
91
+ # In a real app, these would be stored in a database or files
92
+ requirements = {
93
+ "Software Engineer": {
94
+ "min_experience": 2,
95
+ "required_education": ["Bachelor in Computer Science", "BSc CS", "Engineering"],
96
+ "required_skills": """
97
+ programming, algorithms, data structures, software development,
98
+ testing, debugging, version control, agile methodologies
99
+ """
100
+ },
101
+ "Data Scientist": {
102
+ "min_experience": 3,
103
+ "required_education": ["Master", "PhD", "Statistics", "Data Science"],
104
+ "required_skills": """
105
+ machine learning, statistics, python, R, data analysis,
106
+ data visualization, SQL, predictive modeling
107
+ """
108
+ }
109
+ }
110
+
111
+ return requirements.get(job_role, requirements["Software Engineer"])