Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -68,20 +68,21 @@ questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Qu
|
|
68 |
})
|
69 |
|
70 |
courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
|
71 |
-
'skills': ['
|
72 |
-
'course_title': ['
|
73 |
-
'Organization': ['
|
74 |
-
'level': ['Intermediate', 'Intermediate', '
|
75 |
-
'popularity': [0.9, 0.
|
76 |
-
'completion_rate': [0.7, 0.
|
77 |
})
|
78 |
|
79 |
jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
|
80 |
-
'job_title': ['DevOps Engineer', 'Cloud Architect'],
|
81 |
-
'company_name': ['Tech Corp', 'Cloud Inc'],
|
82 |
-
'location': ['Remote', '
|
83 |
-
'required_skills': ['Linux,
|
84 |
-
'job_description': ['DevOps role description', 'Cloud architecture position']
|
|
|
85 |
})
|
86 |
|
87 |
# Validate questions_df
|
@@ -156,7 +157,7 @@ def initialize_resources(user_skills):
|
|
156 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
157 |
logger.info(f"Resources saved to {chosen_model_dir}")
|
158 |
|
159 |
-
# Enhanced evaluation with
|
160 |
def evaluate_response(args):
|
161 |
try:
|
162 |
skill, user_answer, question = args
|
@@ -170,9 +171,8 @@ def evaluate_response(args):
|
|
170 |
is_ai = probs[1] > 0.5
|
171 |
|
172 |
expected_answer = question_to_answer.get(question, "")
|
173 |
-
|
174 |
-
|
175 |
-
score = util.pytorch_cos_sim(user_embedding, expected_embedding).item() * 100
|
176 |
|
177 |
user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
|
178 |
skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
|
@@ -184,20 +184,20 @@ def evaluate_response(args):
|
|
184 |
logger.error(f"Evaluation error for {skill}: {e}")
|
185 |
return skill, 0.0, False
|
186 |
|
187 |
-
# Improved course recommendation
|
188 |
def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
189 |
try:
|
190 |
if not skills_to_improve or courses_df.empty:
|
191 |
return []
|
192 |
|
193 |
-
# Add missing columns if needed
|
194 |
if 'popularity' not in courses_df:
|
195 |
courses_df['popularity'] = 0.8
|
196 |
if 'completion_rate' not in courses_df:
|
197 |
courses_df['completion_rate'] = 0.7
|
198 |
|
199 |
-
|
200 |
-
|
|
|
201 |
similarities = util.pytorch_cos_sim(skill_embeddings, course_embeddings).numpy()
|
202 |
|
203 |
total_scores = 0.6 * similarities + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
|
@@ -207,7 +207,7 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
207 |
for i, skill in enumerate(skills_to_improve):
|
208 |
idx = np.argsort(-total_scores[i])[:5]
|
209 |
candidates = courses_df.iloc[idx]
|
210 |
-
candidates = candidates[candidates['level'].str.contains(target_level, case=False)]
|
211 |
recommendations.extend(candidates[['course_title', 'Organization']].values.tolist()[:3])
|
212 |
|
213 |
return list(dict.fromkeys(map(tuple, recommendations)))
|
@@ -215,26 +215,35 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
215 |
logger.error(f"Course recommendation error: {e}")
|
216 |
return []
|
217 |
|
218 |
-
# Enhanced job recommendation
|
219 |
def recommend_jobs(user_skills, user_level):
|
220 |
try:
|
221 |
if jobs_df.empty:
|
222 |
return []
|
223 |
|
224 |
job_field = 'required_skills' if 'required_skills' in jobs_df.columns else 'job_description'
|
225 |
-
job_embeddings = universal_model.encode(jobs_df[job_field].fillna(""), convert_to_tensor=True)
|
226 |
-
user_embedding = universal_model.encode(" ".join(user_skills), convert_to_tensor=True)
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
)
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'],
|
237 |
-
|
238 |
except Exception as e:
|
239 |
logger.error(f"Job recommendation error: {e}")
|
240 |
return []
|
|
|
68 |
})
|
69 |
|
70 |
courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
|
71 |
+
'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
72 |
+
'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
|
73 |
+
'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
|
74 |
+
'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
|
75 |
+
'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
|
76 |
+
'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
|
77 |
})
|
78 |
|
79 |
jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
|
80 |
+
'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
|
81 |
+
'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
|
82 |
+
'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
|
83 |
+
'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
|
84 |
+
'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
|
85 |
+
'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate'] # Added level for job recommendations
|
86 |
})
|
87 |
|
88 |
# Validate questions_df
|
|
|
157 |
universal_model.save(UNIVERSAL_MODEL_PATH)
|
158 |
logger.info(f"Resources saved to {chosen_model_dir}")
|
159 |
|
160 |
+
# Enhanced evaluation with batch processing
|
161 |
def evaluate_response(args):
|
162 |
try:
|
163 |
skill, user_answer, question = args
|
|
|
171 |
is_ai = probs[1] > 0.5
|
172 |
|
173 |
expected_answer = question_to_answer.get(question, "")
|
174 |
+
user_embeddings = universal_model.encode([user_answer, expected_answer], batch_size=32, convert_to_tensor=True)
|
175 |
+
score = util.pytorch_cos_sim(user_embeddings[0], user_embeddings[1]).item() * 100
|
|
|
176 |
|
177 |
user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
|
178 |
skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
|
|
|
184 |
logger.error(f"Evaluation error for {skill}: {e}")
|
185 |
return skill, 0.0, False
|
186 |
|
187 |
+
# Improved course recommendation with batch processing
|
188 |
def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
189 |
try:
|
190 |
if not skills_to_improve or courses_df.empty:
|
191 |
return []
|
192 |
|
|
|
193 |
if 'popularity' not in courses_df:
|
194 |
courses_df['popularity'] = 0.8
|
195 |
if 'completion_rate' not in courses_df:
|
196 |
courses_df['completion_rate'] = 0.7
|
197 |
|
198 |
+
# Batch encode skills and courses
|
199 |
+
skill_embeddings = universal_model.encode(skills_to_improve, batch_size=32, convert_to_tensor=True)
|
200 |
+
course_embeddings = universal_model.encode(courses_df['skills'].fillna(""), batch_size=32, convert_to_tensor=True)
|
201 |
similarities = util.pytorch_cos_sim(skill_embeddings, course_embeddings).numpy()
|
202 |
|
203 |
total_scores = 0.6 * similarities + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
|
|
|
207 |
for i, skill in enumerate(skills_to_improve):
|
208 |
idx = np.argsort(-total_scores[i])[:5]
|
209 |
candidates = courses_df.iloc[idx]
|
210 |
+
candidates = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
|
211 |
recommendations.extend(candidates[['course_title', 'Organization']].values.tolist()[:3])
|
212 |
|
213 |
return list(dict.fromkeys(map(tuple, recommendations)))
|
|
|
215 |
logger.error(f"Course recommendation error: {e}")
|
216 |
return []
|
217 |
|
218 |
+
# Enhanced job recommendation with fixed level handling
|
219 |
def recommend_jobs(user_skills, user_level):
|
220 |
try:
|
221 |
if jobs_df.empty:
|
222 |
return []
|
223 |
|
224 |
job_field = 'required_skills' if 'required_skills' in jobs_df.columns else 'job_description'
|
225 |
+
job_embeddings = universal_model.encode(jobs_df[job_field].fillna(""), batch_size=32, convert_to_tensor=True)
|
226 |
+
user_embedding = universal_model.encode(" ".join(user_skills), batch_size=32, convert_to_tensor=True)
|
227 |
+
skill_similarities = util.pytorch_cos_sim(user_embedding, job_embeddings).numpy()[0]
|
228 |
+
|
229 |
+
# Ensure level column exists and is a Series
|
230 |
+
if 'level' not in jobs_df.columns:
|
231 |
+
jobs_df['level'] = 'Intermediate'
|
232 |
+
level_col = jobs_df['level'].astype(str) # Ensure it's a string Series
|
233 |
+
|
234 |
+
level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
|
235 |
+
user_level_num = level_map.get(user_level, 1)
|
236 |
+
level_scores = level_col.apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2)
|
237 |
+
|
238 |
+
location_pref = jobs_df.get('location', pd.Series(['Remote'] * len(jobs_df))).apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7)
|
239 |
+
industry_embeddings = universal_model.encode(jobs_df['job_title'].fillna(""), batch_size=32, convert_to_tensor=True)
|
240 |
+
industry_similarities = util.pytorch_cos_sim(user_embedding, industry_embeddings).numpy()[0]
|
241 |
+
|
242 |
+
total_job_scores = 0.5 * skill_similarities + 0.2 * level_scores + 0.1 * location_pref + 0.2 * industry_similarities
|
243 |
+
top_job_indices = np.argsort(-total_job_scores)[:5]
|
244 |
+
|
245 |
return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'],
|
246 |
+
jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices]
|
247 |
except Exception as e:
|
248 |
logger.error(f"Job recommendation error: {e}")
|
249 |
return []
|