Muhammad541 commited on
Commit
905ff75
·
verified ·
1 Parent(s): 337d086

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -33
app.py CHANGED
@@ -68,20 +68,21 @@ questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Qu
68
  })
69
 
70
  courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
71
- 'skills': ['Docker', 'Jenkins', 'Azure', 'Cybersecurity'],
72
- 'course_title': ['Docker Mastery', 'Jenkins CI/CD', 'Azure Fundamentals', 'Cybersecurity Basics'],
73
- 'Organization': ['Udemy', 'Coursera', 'Microsoft', 'edX'],
74
- 'level': ['Intermediate', 'Intermediate', 'Intermediate', 'Advanced'],
75
- 'popularity': [0.9, 0.85, 0.95, 0.8],
76
- 'completion_rate': [0.7, 0.65, 0.8, 0.6]
77
  })
78
 
79
  jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
80
- 'job_title': ['DevOps Engineer', 'Cloud Architect'],
81
- 'company_name': ['Tech Corp', 'Cloud Inc'],
82
- 'location': ['Remote', 'Silicon Valley'],
83
- 'required_skills': ['Linux, Cloud', 'AWS, Kubernetes'],
84
- 'job_description': ['DevOps role description', 'Cloud architecture position']
 
85
  })
86
 
87
  # Validate questions_df
@@ -156,7 +157,7 @@ def initialize_resources(user_skills):
156
  universal_model.save(UNIVERSAL_MODEL_PATH)
157
  logger.info(f"Resources saved to {chosen_model_dir}")
158
 
159
- # Enhanced evaluation with error handling
160
  def evaluate_response(args):
161
  try:
162
  skill, user_answer, question = args
@@ -170,9 +171,8 @@ def evaluate_response(args):
170
  is_ai = probs[1] > 0.5
171
 
172
  expected_answer = question_to_answer.get(question, "")
173
- user_embedding = universal_model.encode(user_answer, convert_to_tensor=True)
174
- expected_embedding = universal_model.encode(expected_answer, convert_to_tensor=True)
175
- score = util.pytorch_cos_sim(user_embedding, expected_embedding).item() * 100
176
 
177
  user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
178
  skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
@@ -184,20 +184,20 @@ def evaluate_response(args):
184
  logger.error(f"Evaluation error for {skill}: {e}")
185
  return skill, 0.0, False
186
 
187
- # Improved course recommendation
188
  def recommend_courses(skills_to_improve, user_level, upgrade=False):
189
  try:
190
  if not skills_to_improve or courses_df.empty:
191
  return []
192
 
193
- # Add missing columns if needed
194
  if 'popularity' not in courses_df:
195
  courses_df['popularity'] = 0.8
196
  if 'completion_rate' not in courses_df:
197
  courses_df['completion_rate'] = 0.7
198
 
199
- skill_embeddings = universal_model.encode(skills_to_improve, convert_to_tensor=True)
200
- course_embeddings = universal_model.encode(courses_df['skills'].fillna(""), convert_to_tensor=True)
 
201
  similarities = util.pytorch_cos_sim(skill_embeddings, course_embeddings).numpy()
202
 
203
  total_scores = 0.6 * similarities + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
@@ -207,7 +207,7 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
207
  for i, skill in enumerate(skills_to_improve):
208
  idx = np.argsort(-total_scores[i])[:5]
209
  candidates = courses_df.iloc[idx]
210
- candidates = candidates[candidates['level'].str.contains(target_level, case=False)]
211
  recommendations.extend(candidates[['course_title', 'Organization']].values.tolist()[:3])
212
 
213
  return list(dict.fromkeys(map(tuple, recommendations)))
@@ -215,26 +215,35 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
215
  logger.error(f"Course recommendation error: {e}")
216
  return []
217
 
218
- # Enhanced job recommendation
219
  def recommend_jobs(user_skills, user_level):
220
  try:
221
  if jobs_df.empty:
222
  return []
223
 
224
  job_field = 'required_skills' if 'required_skills' in jobs_df.columns else 'job_description'
225
- job_embeddings = universal_model.encode(jobs_df[job_field].fillna(""), convert_to_tensor=True)
226
- user_embedding = universal_model.encode(" ".join(user_skills), convert_to_tensor=True)
227
- similarities = util.pytorch_cos_sim(user_embedding, job_embeddings).numpy()[0]
228
-
229
- level_scores = jobs_df.get('level', 'Intermediate').apply(
230
- lambda x: 1 - abs({'Beginner':0, 'Intermediate':1, 'Advanced':2}.get(x,1) -
231
- {'Beginner':0, 'Intermediate':1, 'Advanced':2}[user_level])/2
232
- )
233
- total_scores = 0.6 * similarities + 0.4 * level_scores
234
- top_idx = np.argsort(-total_scores)[:5]
235
-
 
 
 
 
 
 
 
 
 
236
  return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'],
237
- jobs_df.iloc[i].get('location', 'Remote')) for i in top_idx]
238
  except Exception as e:
239
  logger.error(f"Job recommendation error: {e}")
240
  return []
 
68
  })
69
 
70
  courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
71
+ 'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
72
+ 'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
73
+ 'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
74
+ 'level': ['Intermediate', 'Intermediate', 'Advanced', 'Advanced', 'Intermediate'],
75
+ 'popularity': [0.85, 0.9, 0.8, 0.95, 0.9],
76
+ 'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
77
  })
78
 
79
  jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
80
+ 'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
81
+ 'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
82
+ 'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
83
+ 'required_skills': ['Linux, Kubernetes', 'AWS, Kubernetes', 'Python, Node.js', 'Python, SQL', 'Cybersecurity, Linux'],
84
+ 'job_description': ['DevOps role description', 'Cloud architecture position', 'Software engineering role', 'Data science position', 'Security analyst role'],
85
+ 'level': ['Intermediate', 'Advanced', 'Intermediate', 'Intermediate', 'Intermediate'] # Added level for job recommendations
86
  })
87
 
88
  # Validate questions_df
 
157
  universal_model.save(UNIVERSAL_MODEL_PATH)
158
  logger.info(f"Resources saved to {chosen_model_dir}")
159
 
160
+ # Enhanced evaluation with batch processing
161
  def evaluate_response(args):
162
  try:
163
  skill, user_answer, question = args
 
171
  is_ai = probs[1] > 0.5
172
 
173
  expected_answer = question_to_answer.get(question, "")
174
+ user_embeddings = universal_model.encode([user_answer, expected_answer], batch_size=32, convert_to_tensor=True)
175
+ score = util.pytorch_cos_sim(user_embeddings[0], user_embeddings[1]).item() * 100
 
176
 
177
  user_tfidf = tfidf_vectorizer.transform([user_answer]).toarray()[0]
178
  skill_vec = skill_tfidf.get(skill.lower(), np.zeros_like(user_tfidf))
 
184
  logger.error(f"Evaluation error for {skill}: {e}")
185
  return skill, 0.0, False
186
 
187
+ # Improved course recommendation with batch processing
188
  def recommend_courses(skills_to_improve, user_level, upgrade=False):
189
  try:
190
  if not skills_to_improve or courses_df.empty:
191
  return []
192
 
 
193
  if 'popularity' not in courses_df:
194
  courses_df['popularity'] = 0.8
195
  if 'completion_rate' not in courses_df:
196
  courses_df['completion_rate'] = 0.7
197
 
198
+ # Batch encode skills and courses
199
+ skill_embeddings = universal_model.encode(skills_to_improve, batch_size=32, convert_to_tensor=True)
200
+ course_embeddings = universal_model.encode(courses_df['skills'].fillna(""), batch_size=32, convert_to_tensor=True)
201
  similarities = util.pytorch_cos_sim(skill_embeddings, course_embeddings).numpy()
202
 
203
  total_scores = 0.6 * similarities + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
 
207
  for i, skill in enumerate(skills_to_improve):
208
  idx = np.argsort(-total_scores[i])[:5]
209
  candidates = courses_df.iloc[idx]
210
+ candidates = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
211
  recommendations.extend(candidates[['course_title', 'Organization']].values.tolist()[:3])
212
 
213
  return list(dict.fromkeys(map(tuple, recommendations)))
 
215
  logger.error(f"Course recommendation error: {e}")
216
  return []
217
 
218
+ # Enhanced job recommendation with fixed level handling
219
  def recommend_jobs(user_skills, user_level):
220
  try:
221
  if jobs_df.empty:
222
  return []
223
 
224
  job_field = 'required_skills' if 'required_skills' in jobs_df.columns else 'job_description'
225
+ job_embeddings = universal_model.encode(jobs_df[job_field].fillna(""), batch_size=32, convert_to_tensor=True)
226
+ user_embedding = universal_model.encode(" ".join(user_skills), batch_size=32, convert_to_tensor=True)
227
+ skill_similarities = util.pytorch_cos_sim(user_embedding, job_embeddings).numpy()[0]
228
+
229
+ # Ensure level column exists and is a Series
230
+ if 'level' not in jobs_df.columns:
231
+ jobs_df['level'] = 'Intermediate'
232
+ level_col = jobs_df['level'].astype(str) # Ensure it's a string Series
233
+
234
+ level_map = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
235
+ user_level_num = level_map.get(user_level, 1)
236
+ level_scores = level_col.apply(lambda x: 1 - abs(level_map.get(x, 1) - user_level_num)/2)
237
+
238
+ location_pref = jobs_df.get('location', pd.Series(['Remote'] * len(jobs_df))).apply(lambda x: 1.0 if x in ['Islamabad', 'Karachi'] else 0.7)
239
+ industry_embeddings = universal_model.encode(jobs_df['job_title'].fillna(""), batch_size=32, convert_to_tensor=True)
240
+ industry_similarities = util.pytorch_cos_sim(user_embedding, industry_embeddings).numpy()[0]
241
+
242
+ total_job_scores = 0.5 * skill_similarities + 0.2 * level_scores + 0.1 * location_pref + 0.2 * industry_similarities
243
+ top_job_indices = np.argsort(-total_job_scores)[:5]
244
+
245
  return [(jobs_df.iloc[i]['job_title'], jobs_df.iloc[i]['company_name'],
246
+ jobs_df.iloc[i].get('location', 'Remote')) for i in top_job_indices]
247
  except Exception as e:
248
  logger.error(f"Job recommendation error: {e}")
249
  return []