Spaces:
Running
Running
Delete utils
Browse files- utils/cv_processor.py +0 -111
- utils/interview_agent.py +0 -150
- utils/report_generator.py +0 -47
utils/cv_processor.py
DELETED
@@ -1,111 +0,0 @@
|
|
1 |
-
from PyPDF2 import PdfReader
|
2 |
-
from docx import Document
|
3 |
-
import re
|
4 |
-
from sentence_transformers import SentenceTransformer
|
5 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
-
import numpy as np
|
7 |
-
import os
|
8 |
-
|
9 |
-
# Initialize model for semantic similarity
|
10 |
-
model = SentenceTransformer('all-MiniLM-L6-v2')
|
11 |
-
|
12 |
-
def extract_text_from_file(file_path):
|
13 |
-
if file_path.endswith('.pdf'):
|
14 |
-
with open(file_path, 'rb') as f:
|
15 |
-
reader = PdfReader(f)
|
16 |
-
text = " ".join([page.extract_text() for page in reader.pages])
|
17 |
-
elif file_path.endswith('.docx'):
|
18 |
-
doc = Document(file_path)
|
19 |
-
text = " ".join([para.text for para in doc.paragraphs])
|
20 |
-
else:
|
21 |
-
raise ValueError("Unsupported file format")
|
22 |
-
|
23 |
-
# Clean text
|
24 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
25 |
-
return text
|
26 |
-
|
27 |
-
def evaluate_cv(cv_path, job_role):
|
28 |
-
# Load job requirements (you would have these stored for each role)
|
29 |
-
job_requirements = load_job_requirements(job_role)
|
30 |
-
|
31 |
-
# Extract text from CV
|
32 |
-
cv_text = extract_text_from_file(cv_path)
|
33 |
-
|
34 |
-
# Create embeddings
|
35 |
-
cv_embedding = model.encode(cv_text)
|
36 |
-
requirements_embedding = model.encode(job_requirements["required_skills"])
|
37 |
-
|
38 |
-
# Calculate similarity
|
39 |
-
similarity = cosine_similarity(
|
40 |
-
cv_embedding.reshape(1, -1),
|
41 |
-
requirements_embedding.reshape(1, -1)
|
42 |
-
)[0][0]
|
43 |
-
|
44 |
-
# Check minimum requirements
|
45 |
-
rejection_reasons = []
|
46 |
-
meets_requirements = True
|
47 |
-
|
48 |
-
# Check for minimum experience
|
49 |
-
experience_pattern = r"(\d+)\s+years?"
|
50 |
-
experience_matches = re.findall(experience_pattern, cv_text.lower())
|
51 |
-
total_experience = sum(int(match) for match in experience_matches) if experience_matches else 0
|
52 |
-
|
53 |
-
if total_experience < job_requirements["min_experience"]:
|
54 |
-
meets_requirements = False
|
55 |
-
rejection_reasons.append(
|
56 |
-
f"Requires {job_requirements['min_experience']} years experience, found {total_experience}"
|
57 |
-
)
|
58 |
-
|
59 |
-
# Check education
|
60 |
-
education_keywords = job_requirements["required_education"]
|
61 |
-
has_education = any(keyword.lower() in cv_text.lower() for keyword in education_keywords)
|
62 |
-
|
63 |
-
if not has_education:
|
64 |
-
meets_requirements = False
|
65 |
-
rejection_reasons.append(
|
66 |
-
f"Required education not found: {', '.join(education_keywords)}"
|
67 |
-
)
|
68 |
-
|
69 |
-
# Check similarity threshold
|
70 |
-
if similarity < 0.4: # Adjust threshold as needed
|
71 |
-
meets_requirements = False
|
72 |
-
rejection_reasons.append(
|
73 |
-
"CV content doesn't sufficiently match the required skills"
|
74 |
-
)
|
75 |
-
|
76 |
-
# Prepare CV summary for interview
|
77 |
-
cv_summary = {
|
78 |
-
"text": cv_text,
|
79 |
-
"experience": total_experience,
|
80 |
-
"skills_similarity": float(similarity),
|
81 |
-
"education": has_education
|
82 |
-
}
|
83 |
-
|
84 |
-
return {
|
85 |
-
"is_qualified": meets_requirements,
|
86 |
-
"rejection_reasons": rejection_reasons,
|
87 |
-
"cv_summary": cv_summary
|
88 |
-
}
|
89 |
-
|
90 |
-
def load_job_requirements(job_role):
|
91 |
-
# In a real app, these would be stored in a database or files
|
92 |
-
requirements = {
|
93 |
-
"Software Engineer": {
|
94 |
-
"min_experience": 2,
|
95 |
-
"required_education": ["Bachelor in Computer Science", "BSc CS", "Engineering"],
|
96 |
-
"required_skills": """
|
97 |
-
programming, algorithms, data structures, software development,
|
98 |
-
testing, debugging, version control, agile methodologies
|
99 |
-
"""
|
100 |
-
},
|
101 |
-
"Data Scientist": {
|
102 |
-
"min_experience": 3,
|
103 |
-
"required_education": ["Master", "PhD", "Statistics", "Data Science"],
|
104 |
-
"required_skills": """
|
105 |
-
machine learning, statistics, python, R, data analysis,
|
106 |
-
data visualization, SQL, predictive modeling
|
107 |
-
"""
|
108 |
-
}
|
109 |
-
}
|
110 |
-
|
111 |
-
return requirements.get(job_role, requirements["Software Engineer"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/interview_agent.py
DELETED
@@ -1,150 +0,0 @@
|
|
1 |
-
from langchain.llms import HuggingFaceHub
|
2 |
-
from langchain.chains import RetrievalQA
|
3 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
-
from langchain.vectorstores import FAISS
|
5 |
-
from langchain.document_loaders import TextLoader
|
6 |
-
from langchain.text_splitter import CharacterTextSplitter
|
7 |
-
import os
|
8 |
-
|
9 |
-
class InterviewAgent:
|
10 |
-
def __init__(self, job_role, cv_summary):
|
11 |
-
self.job_role = job_role
|
12 |
-
self.cv_summary = cv_summary
|
13 |
-
self.llm = HuggingFaceHub(
|
14 |
-
repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
|
15 |
-
model_kwargs={"temperature": 0.5, "max_length": 2048}
|
16 |
-
)
|
17 |
-
self.questions = self._generate_questions()
|
18 |
-
|
19 |
-
def _generate_questions(self):
|
20 |
-
# Load job-specific questions
|
21 |
-
base_questions = self._load_base_questions()
|
22 |
-
|
23 |
-
# Generate CV-specific questions
|
24 |
-
cv_questions = self._generate_cv_questions()
|
25 |
-
|
26 |
-
return base_questions + cv_questions
|
27 |
-
|
28 |
-
def _load_base_questions(self):
|
29 |
-
# In a real app, these would be more sophisticated and loaded from a database
|
30 |
-
role_questions = {
|
31 |
-
"Software Engineer": [
|
32 |
-
{"text": "Explain the SOLID principles in object-oriented design.", "type": "technical", "weight": 0.3},
|
33 |
-
{"text": "How would you optimize a slow database query?", "type": "technical", "weight": 0.25},
|
34 |
-
{"text": "Describe your experience with Agile methodologies.", "type": "behavioral", "weight": 0.2},
|
35 |
-
{"text": "How do you handle conflicts in a team setting?", "type": "behavioral", "weight": 0.15},
|
36 |
-
{"text": "Where do you see yourself in 5 years?", "type": "general", "weight": 0.1}
|
37 |
-
],
|
38 |
-
"Data Scientist": [
|
39 |
-
{"text": "Explain the bias-variance tradeoff.", "type": "technical", "weight": 0.3},
|
40 |
-
{"text": "How would you handle missing data in a dataset?", "type": "technical", "weight": 0.25},
|
41 |
-
{"text": "Describe a time when you had to explain complex technical concepts to non-technical stakeholders.", "type": "behavioral", "weight": 0.2},
|
42 |
-
{"text": "How do you stay updated with the latest developments in data science?", "type": "behavioral", "weight": 0.15},
|
43 |
-
{"text": "What motivates you to work in data science?", "type": "general", "weight": 0.1}
|
44 |
-
]
|
45 |
-
}
|
46 |
-
|
47 |
-
return role_questions.get(self.job_role, role_questions["Software Engineer"])
|
48 |
-
|
49 |
-
def _generate_cv_questions(self):
|
50 |
-
# Generate questions based on CV content
|
51 |
-
prompt = f"""
|
52 |
-
Based on the following CV summary for a {self.job_role} position, generate 3 specific interview questions.
|
53 |
-
Focus on areas that need clarification or seem particularly relevant to the role.
|
54 |
-
|
55 |
-
CV Summary:
|
56 |
-
{self.cv_summary['text']}
|
57 |
-
|
58 |
-
Generate exactly 3 questions in this format:
|
59 |
-
1. [question text]|technical
|
60 |
-
2. [question text]|behavioral
|
61 |
-
3. [question text]|technical
|
62 |
-
|
63 |
-
Make the questions specific to the candidate's experience and the job role.
|
64 |
-
"""
|
65 |
-
|
66 |
-
response = self.llm(prompt)
|
67 |
-
questions = []
|
68 |
-
|
69 |
-
for line in response.split('\n'):
|
70 |
-
if line.strip() and '|' in line:
|
71 |
-
text = line.split('|')[0].strip()
|
72 |
-
q_type = line.split('|')[1].strip().lower()
|
73 |
-
questions.append({
|
74 |
-
"text": text,
|
75 |
-
"type": q_type,
|
76 |
-
"weight": 0.15 if q_type == "technical" else 0.1,
|
77 |
-
"cv_based": True
|
78 |
-
})
|
79 |
-
|
80 |
-
return questions[:3] # Ensure we only take 3 questions
|
81 |
-
|
82 |
-
def get_questions(self):
|
83 |
-
return self.questions
|
84 |
-
|
85 |
-
def evaluate_answer(self, question, answer):
|
86 |
-
prompt = f"""
|
87 |
-
Evaluate the following interview answer for a {self.job_role} position.
|
88 |
-
Provide specific feedback and a score from 1-10 based on:
|
89 |
-
- Technical accuracy (if technical question)
|
90 |
-
- Relevance to the question
|
91 |
-
- Clarity of communication
|
92 |
-
- Demonstration of skills/experience
|
93 |
-
|
94 |
-
Question: {question['text']}
|
95 |
-
Answer: {answer}
|
96 |
-
|
97 |
-
Respond in this exact format:
|
98 |
-
Score: [x]/10
|
99 |
-
Feedback: [your feedback here]
|
100 |
-
"""
|
101 |
-
|
102 |
-
response = self.llm(prompt)
|
103 |
-
|
104 |
-
# Parse the response
|
105 |
-
score = 5 # default if parsing fails
|
106 |
-
feedback = "Evaluation not available"
|
107 |
-
|
108 |
-
if "Score:" in response and "Feedback:" in response:
|
109 |
-
try:
|
110 |
-
score_part = response.split("Score:")[1].split("/10")[0].strip()
|
111 |
-
score = float(score_part)
|
112 |
-
feedback = response.split("Feedback:")[1].strip()
|
113 |
-
except:
|
114 |
-
pass
|
115 |
-
|
116 |
-
return {
|
117 |
-
"score": score,
|
118 |
-
"feedback": feedback,
|
119 |
-
"max_score": 10
|
120 |
-
}
|
121 |
-
|
122 |
-
def final_evaluation(self, answers):
|
123 |
-
total_score = 0
|
124 |
-
max_possible = 0
|
125 |
-
|
126 |
-
# Calculate weighted score
|
127 |
-
for answer in answers:
|
128 |
-
weight = answer['question'].get('weight', 0.1)
|
129 |
-
total_score += answer['evaluation']['score'] * weight
|
130 |
-
max_possible += 10 * weight
|
131 |
-
|
132 |
-
overall_score = (total_score / max_possible) * 10
|
133 |
-
|
134 |
-
# Determine band
|
135 |
-
if overall_score >= 9:
|
136 |
-
band = "Expert (Band 5)"
|
137 |
-
elif overall_score >= 7:
|
138 |
-
band = "Proficient (Band 4)"
|
139 |
-
elif overall_score >= 5:
|
140 |
-
band = "Competent (Band 3)"
|
141 |
-
elif overall_score >= 3:
|
142 |
-
band = "Limited (Band 2)"
|
143 |
-
else:
|
144 |
-
band = "Beginner (Band 1)"
|
145 |
-
|
146 |
-
return {
|
147 |
-
"score": round(overall_score, 1),
|
148 |
-
"band": band,
|
149 |
-
"total_questions": len(answers)
|
150 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/report_generator.py
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
from fpdf import FPDF
|
2 |
-
from datetime import datetime
|
3 |
-
|
4 |
-
def generate_report(job_role, cv_summary, answers, final_evaluation):
|
5 |
-
pdf = FPDF()
|
6 |
-
pdf.add_page()
|
7 |
-
|
8 |
-
# Set font and title
|
9 |
-
pdf.set_font("Arial", 'B', 16)
|
10 |
-
pdf.cell(0, 10, f"Interview Report for {job_role}", 0, 1, 'C')
|
11 |
-
pdf.ln(10)
|
12 |
-
|
13 |
-
# Candidate Summary
|
14 |
-
pdf.set_font("Arial", 'B', 12)
|
15 |
-
pdf.cell(0, 10, "Candidate Summary:", 0, 1)
|
16 |
-
pdf.set_font("Arial", '', 10)
|
17 |
-
pdf.multi_cell(0, 7, f"Experience: {cv_summary['experience']} years\nSkills Match: {cv_summary['skills_similarity']*100:.1f}%")
|
18 |
-
pdf.ln(5)
|
19 |
-
|
20 |
-
# Interview Results
|
21 |
-
pdf.set_font("Arial", 'B', 12)
|
22 |
-
pdf.cell(0, 10, "Interview Results:", 0, 1)
|
23 |
-
pdf.set_font("Arial", '', 10)
|
24 |
-
pdf.cell(0, 7, f"Overall Score: {final_evaluation['score']}/10", 0, 1)
|
25 |
-
pdf.cell(0, 7, f"Band: {final_evaluation['band']}", 0, 1)
|
26 |
-
pdf.ln(5)
|
27 |
-
|
28 |
-
# Detailed Feedback
|
29 |
-
pdf.set_font("Arial", 'B', 12)
|
30 |
-
pdf.cell(0, 10, "Question-by-Question Feedback:", 0, 1)
|
31 |
-
pdf.set_font("Arial", '', 10)
|
32 |
-
|
33 |
-
for i, answer in enumerate(answers):
|
34 |
-
pdf.set_fill_color(200, 220, 255)
|
35 |
-
pdf.cell(0, 7, f"Question {i+1}: {answer['question']['text']}", 0, 1, fill=True)
|
36 |
-
pdf.cell(0, 7, f"Your Answer: {answer['answer']}", 0, 1)
|
37 |
-
pdf.cell(0, 7, f"Score: {answer['evaluation']['score']}/10", 0, 1)
|
38 |
-
pdf.multi_cell(0, 7, f"Feedback: {answer['evaluation']['feedback']}")
|
39 |
-
pdf.ln(3)
|
40 |
-
|
41 |
-
# Save the report
|
42 |
-
os.makedirs("data/interviews", exist_ok=True)
|
43 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
44 |
-
report_path = f"data/interviews/report_{timestamp}.pdf"
|
45 |
-
pdf.output(report_path)
|
46 |
-
|
47 |
-
return report_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|