Muhammad541 commited on
Commit
e94d8bf
·
verified ·
1 Parent(s): 5af9a5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -75,6 +75,13 @@ def load_dataset(file_path, required_columns=[], additional_columns=['popularity
75
  else:
76
  df[col] = 0.0 # Default for other additional columns
77
 
 
 
 
 
 
 
 
78
  return df
79
  except ValueError as ve:
80
  logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
@@ -187,12 +194,12 @@ def precompute_resources():
187
  course_skills = courses_df['skills'].fillna("").tolist()
188
  course_embeddings = universal_model.encode(course_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
189
  skill_embeddings = universal_model.encode(questions_df['Skill'].unique().tolist(), batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu")
190
- course_similarity = util.pytorch_cos_sim(torch.tensor(skill_embeddings), torch.tensor(course_embeddings)).numpy()
191
 
192
  # Precompute job similarities
193
  job_skills = jobs_df['required_skills'].fillna("").tolist()
194
  job_embeddings = universal_model.encode(job_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
195
- job_similarity = util.pytorch_cos_sim(torch.tensor(skill_embeddings), torch.tensor(job_embeddings)).numpy()
196
 
197
  # Save precomputed resources
198
  with open(TFIDF_PATH, 'wb') as f: pickle.dump(tfidf_vectorizer, f)
@@ -236,14 +243,16 @@ def evaluate_response(args):
236
  def recommend_courses(skills_to_improve, user_level, upgrade=False):
237
  try:
238
  if not skills_to_improve or courses_df.empty:
 
239
  return []
240
 
241
  skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
242
  if not skill_indices:
 
243
  return []
244
 
245
  similarities = course_similarity[skill_indices]
246
- # Use get() with default arrays to avoid KeyError
247
  popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
248
  completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
249
  total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
@@ -251,8 +260,14 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
251
  target_level = 'Advanced' if upgrade else user_level
252
  idx = np.argsort(-total_scores)[:5]
253
  candidates = courses_df.iloc[idx]
254
- candidates = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
255
- return candidates[['course_title', 'Organization']].values.tolist()[:3]
 
 
 
 
 
 
256
  except Exception as e:
257
  logger.error(f"Course recommendation error: {e}")
258
  return []
 
75
  else:
76
  df[col] = 0.0 # Default for other additional columns
77
 
78
+ # Ensure 'level' column has valid values (not empty)
79
+ if 'level' in df.columns:
80
+ df['level'] = df['level'].apply(lambda x: 'Intermediate' if pd.isna(x) or x.strip() == "" else x)
81
+ else:
82
+ logger.warning(f"'level' column missing in {file_path}. Adding default 'Intermediate'.")
83
+ df['level'] = 'Intermediate'
84
+
85
  return df
86
  except ValueError as ve:
87
  logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
 
194
  course_skills = courses_df['skills'].fillna("").tolist()
195
  course_embeddings = universal_model.encode(course_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
196
  skill_embeddings = universal_model.encode(questions_df['Skill'].unique().tolist(), batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu")
197
+ course_similarity = util.pytorch_cos_sim(skill_embeddings.clone().detach(), course_embeddings.clone().detach()).numpy()
198
 
199
  # Precompute job similarities
200
  job_skills = jobs_df['required_skills'].fillna("").tolist()
201
  job_embeddings = universal_model.encode(job_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
202
+ job_similarity = util.pytorch_cos_sim(skill_embeddings.clone().detach(), job_embeddings.clone().detach()).numpy()
203
 
204
  # Save precomputed resources
205
  with open(TFIDF_PATH, 'wb') as f: pickle.dump(tfidf_vectorizer, f)
 
243
  def recommend_courses(skills_to_improve, user_level, upgrade=False):
244
  try:
245
  if not skills_to_improve or courses_df.empty:
246
+ logger.info("No skills to improve or courses_df is empty.")
247
  return []
248
 
249
  skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
250
  if not skill_indices:
251
+ logger.info("No matching skill indices found.")
252
  return []
253
 
254
  similarities = course_similarity[skill_indices]
255
+ # Use default arrays to avoid KeyError
256
  popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
257
  completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
258
  total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
 
260
  target_level = 'Advanced' if upgrade else user_level
261
  idx = np.argsort(-total_scores)[:5]
262
  candidates = courses_df.iloc[idx]
263
+
264
+ # Filter by level, but fallback to all courses if none match
265
+ filtered_candidates = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
266
+ if filtered_candidates.empty:
267
+ logger.warning(f"No courses found for level {target_level}. Returning top courses regardless of level.")
268
+ filtered_candidates = candidates
269
+
270
+ return filtered_candidates[['course_title', 'Organization']].values.tolist()[:3]
271
  except Exception as e:
272
  logger.error(f"Course recommendation error: {e}")
273
  return []