Jekyll2000 commited on
Commit
ae77d36
·
verified ·
1 Parent(s): b004661

Delete utils

Browse files
utils/cv_processor.py DELETED
@@ -1,111 +0,0 @@
1
- from PyPDF2 import PdfReader
2
- from docx import Document
3
- import re
4
- from sentence_transformers import SentenceTransformer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- import numpy as np
7
- import os
8
-
9
- # Initialize model for semantic similarity
10
- model = SentenceTransformer('all-MiniLM-L6-v2')
11
-
12
- def extract_text_from_file(file_path):
13
- if file_path.endswith('.pdf'):
14
- with open(file_path, 'rb') as f:
15
- reader = PdfReader(f)
16
- text = " ".join([page.extract_text() for page in reader.pages])
17
- elif file_path.endswith('.docx'):
18
- doc = Document(file_path)
19
- text = " ".join([para.text for para in doc.paragraphs])
20
- else:
21
- raise ValueError("Unsupported file format")
22
-
23
- # Clean text
24
- text = re.sub(r'\s+', ' ', text).strip()
25
- return text
26
-
27
- def evaluate_cv(cv_path, job_role):
28
- # Load job requirements (you would have these stored for each role)
29
- job_requirements = load_job_requirements(job_role)
30
-
31
- # Extract text from CV
32
- cv_text = extract_text_from_file(cv_path)
33
-
34
- # Create embeddings
35
- cv_embedding = model.encode(cv_text)
36
- requirements_embedding = model.encode(job_requirements["required_skills"])
37
-
38
- # Calculate similarity
39
- similarity = cosine_similarity(
40
- cv_embedding.reshape(1, -1),
41
- requirements_embedding.reshape(1, -1)
42
- )[0][0]
43
-
44
- # Check minimum requirements
45
- rejection_reasons = []
46
- meets_requirements = True
47
-
48
- # Check for minimum experience
49
- experience_pattern = r"(\d+)\s+years?"
50
- experience_matches = re.findall(experience_pattern, cv_text.lower())
51
- total_experience = sum(int(match) for match in experience_matches) if experience_matches else 0
52
-
53
- if total_experience < job_requirements["min_experience"]:
54
- meets_requirements = False
55
- rejection_reasons.append(
56
- f"Requires {job_requirements['min_experience']} years experience, found {total_experience}"
57
- )
58
-
59
- # Check education
60
- education_keywords = job_requirements["required_education"]
61
- has_education = any(keyword.lower() in cv_text.lower() for keyword in education_keywords)
62
-
63
- if not has_education:
64
- meets_requirements = False
65
- rejection_reasons.append(
66
- f"Required education not found: {', '.join(education_keywords)}"
67
- )
68
-
69
- # Check similarity threshold
70
- if similarity < 0.4: # Adjust threshold as needed
71
- meets_requirements = False
72
- rejection_reasons.append(
73
- "CV content doesn't sufficiently match the required skills"
74
- )
75
-
76
- # Prepare CV summary for interview
77
- cv_summary = {
78
- "text": cv_text,
79
- "experience": total_experience,
80
- "skills_similarity": float(similarity),
81
- "education": has_education
82
- }
83
-
84
- return {
85
- "is_qualified": meets_requirements,
86
- "rejection_reasons": rejection_reasons,
87
- "cv_summary": cv_summary
88
- }
89
-
90
- def load_job_requirements(job_role):
91
- # In a real app, these would be stored in a database or files
92
- requirements = {
93
- "Software Engineer": {
94
- "min_experience": 2,
95
- "required_education": ["Bachelor in Computer Science", "BSc CS", "Engineering"],
96
- "required_skills": """
97
- programming, algorithms, data structures, software development,
98
- testing, debugging, version control, agile methodologies
99
- """
100
- },
101
- "Data Scientist": {
102
- "min_experience": 3,
103
- "required_education": ["Master", "PhD", "Statistics", "Data Science"],
104
- "required_skills": """
105
- machine learning, statistics, python, R, data analysis,
106
- data visualization, SQL, predictive modeling
107
- """
108
- }
109
- }
110
-
111
- return requirements.get(job_role, requirements["Software Engineer"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/interview_agent.py DELETED
@@ -1,150 +0,0 @@
1
- from langchain.llms import HuggingFaceHub
2
- from langchain.chains import RetrievalQA
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
- from langchain.vectorstores import FAISS
5
- from langchain.document_loaders import TextLoader
6
- from langchain.text_splitter import CharacterTextSplitter
7
- import os
8
-
9
- class InterviewAgent:
10
- def __init__(self, job_role, cv_summary):
11
- self.job_role = job_role
12
- self.cv_summary = cv_summary
13
- self.llm = HuggingFaceHub(
14
- repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
15
- model_kwargs={"temperature": 0.5, "max_length": 2048}
16
- )
17
- self.questions = self._generate_questions()
18
-
19
- def _generate_questions(self):
20
- # Load job-specific questions
21
- base_questions = self._load_base_questions()
22
-
23
- # Generate CV-specific questions
24
- cv_questions = self._generate_cv_questions()
25
-
26
- return base_questions + cv_questions
27
-
28
- def _load_base_questions(self):
29
- # In a real app, these would be more sophisticated and loaded from a database
30
- role_questions = {
31
- "Software Engineer": [
32
- {"text": "Explain the SOLID principles in object-oriented design.", "type": "technical", "weight": 0.3},
33
- {"text": "How would you optimize a slow database query?", "type": "technical", "weight": 0.25},
34
- {"text": "Describe your experience with Agile methodologies.", "type": "behavioral", "weight": 0.2},
35
- {"text": "How do you handle conflicts in a team setting?", "type": "behavioral", "weight": 0.15},
36
- {"text": "Where do you see yourself in 5 years?", "type": "general", "weight": 0.1}
37
- ],
38
- "Data Scientist": [
39
- {"text": "Explain the bias-variance tradeoff.", "type": "technical", "weight": 0.3},
40
- {"text": "How would you handle missing data in a dataset?", "type": "technical", "weight": 0.25},
41
- {"text": "Describe a time when you had to explain complex technical concepts to non-technical stakeholders.", "type": "behavioral", "weight": 0.2},
42
- {"text": "How do you stay updated with the latest developments in data science?", "type": "behavioral", "weight": 0.15},
43
- {"text": "What motivates you to work in data science?", "type": "general", "weight": 0.1}
44
- ]
45
- }
46
-
47
- return role_questions.get(self.job_role, role_questions["Software Engineer"])
48
-
49
- def _generate_cv_questions(self):
50
- # Generate questions based on CV content
51
- prompt = f"""
52
- Based on the following CV summary for a {self.job_role} position, generate 3 specific interview questions.
53
- Focus on areas that need clarification or seem particularly relevant to the role.
54
-
55
- CV Summary:
56
- {self.cv_summary['text']}
57
-
58
- Generate exactly 3 questions in this format:
59
- 1. [question text]|technical
60
- 2. [question text]|behavioral
61
- 3. [question text]|technical
62
-
63
- Make the questions specific to the candidate's experience and the job role.
64
- """
65
-
66
- response = self.llm(prompt)
67
- questions = []
68
-
69
- for line in response.split('\n'):
70
- if line.strip() and '|' in line:
71
- text = line.split('|')[0].strip()
72
- q_type = line.split('|')[1].strip().lower()
73
- questions.append({
74
- "text": text,
75
- "type": q_type,
76
- "weight": 0.15 if q_type == "technical" else 0.1,
77
- "cv_based": True
78
- })
79
-
80
- return questions[:3] # Ensure we only take 3 questions
81
-
82
- def get_questions(self):
83
- return self.questions
84
-
85
- def evaluate_answer(self, question, answer):
86
- prompt = f"""
87
- Evaluate the following interview answer for a {self.job_role} position.
88
- Provide specific feedback and a score from 1-10 based on:
89
- - Technical accuracy (if technical question)
90
- - Relevance to the question
91
- - Clarity of communication
92
- - Demonstration of skills/experience
93
-
94
- Question: {question['text']}
95
- Answer: {answer}
96
-
97
- Respond in this exact format:
98
- Score: [x]/10
99
- Feedback: [your feedback here]
100
- """
101
-
102
- response = self.llm(prompt)
103
-
104
- # Parse the response
105
- score = 5 # default if parsing fails
106
- feedback = "Evaluation not available"
107
-
108
- if "Score:" in response and "Feedback:" in response:
109
- try:
110
- score_part = response.split("Score:")[1].split("/10")[0].strip()
111
- score = float(score_part)
112
- feedback = response.split("Feedback:")[1].strip()
113
- except:
114
- pass
115
-
116
- return {
117
- "score": score,
118
- "feedback": feedback,
119
- "max_score": 10
120
- }
121
-
122
- def final_evaluation(self, answers):
123
- total_score = 0
124
- max_possible = 0
125
-
126
- # Calculate weighted score
127
- for answer in answers:
128
- weight = answer['question'].get('weight', 0.1)
129
- total_score += answer['evaluation']['score'] * weight
130
- max_possible += 10 * weight
131
-
132
- overall_score = (total_score / max_possible) * 10
133
-
134
- # Determine band
135
- if overall_score >= 9:
136
- band = "Expert (Band 5)"
137
- elif overall_score >= 7:
138
- band = "Proficient (Band 4)"
139
- elif overall_score >= 5:
140
- band = "Competent (Band 3)"
141
- elif overall_score >= 3:
142
- band = "Limited (Band 2)"
143
- else:
144
- band = "Beginner (Band 1)"
145
-
146
- return {
147
- "score": round(overall_score, 1),
148
- "band": band,
149
- "total_questions": len(answers)
150
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/report_generator.py DELETED
@@ -1,47 +0,0 @@
1
- from fpdf import FPDF
2
- from datetime import datetime
3
-
4
- def generate_report(job_role, cv_summary, answers, final_evaluation):
5
- pdf = FPDF()
6
- pdf.add_page()
7
-
8
- # Set font and title
9
- pdf.set_font("Arial", 'B', 16)
10
- pdf.cell(0, 10, f"Interview Report for {job_role}", 0, 1, 'C')
11
- pdf.ln(10)
12
-
13
- # Candidate Summary
14
- pdf.set_font("Arial", 'B', 12)
15
- pdf.cell(0, 10, "Candidate Summary:", 0, 1)
16
- pdf.set_font("Arial", '', 10)
17
- pdf.multi_cell(0, 7, f"Experience: {cv_summary['experience']} years\nSkills Match: {cv_summary['skills_similarity']*100:.1f}%")
18
- pdf.ln(5)
19
-
20
- # Interview Results
21
- pdf.set_font("Arial", 'B', 12)
22
- pdf.cell(0, 10, "Interview Results:", 0, 1)
23
- pdf.set_font("Arial", '', 10)
24
- pdf.cell(0, 7, f"Overall Score: {final_evaluation['score']}/10", 0, 1)
25
- pdf.cell(0, 7, f"Band: {final_evaluation['band']}", 0, 1)
26
- pdf.ln(5)
27
-
28
- # Detailed Feedback
29
- pdf.set_font("Arial", 'B', 12)
30
- pdf.cell(0, 10, "Question-by-Question Feedback:", 0, 1)
31
- pdf.set_font("Arial", '', 10)
32
-
33
- for i, answer in enumerate(answers):
34
- pdf.set_fill_color(200, 220, 255)
35
- pdf.cell(0, 7, f"Question {i+1}: {answer['question']['text']}", 0, 1, fill=True)
36
- pdf.cell(0, 7, f"Your Answer: {answer['answer']}", 0, 1)
37
- pdf.cell(0, 7, f"Score: {answer['evaluation']['score']}/10", 0, 1)
38
- pdf.multi_cell(0, 7, f"Feedback: {answer['evaluation']['feedback']}")
39
- pdf.ln(3)
40
-
41
- # Save the report
42
- os.makedirs("data/interviews", exist_ok=True)
43
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
44
- report_path = f"data/interviews/report_{timestamp}.pdf"
45
- pdf.output(report_path)
46
-
47
- return report_path