Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -75,6 +75,13 @@ def load_dataset(file_path, required_columns=[], additional_columns=['popularity
|
|
75 |
else:
|
76 |
df[col] = 0.0 # Default for other additional columns
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
return df
|
79 |
except ValueError as ve:
|
80 |
logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
|
@@ -187,12 +194,12 @@ def precompute_resources():
|
|
187 |
course_skills = courses_df['skills'].fillna("").tolist()
|
188 |
course_embeddings = universal_model.encode(course_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
|
189 |
skill_embeddings = universal_model.encode(questions_df['Skill'].unique().tolist(), batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu")
|
190 |
-
course_similarity = util.pytorch_cos_sim(
|
191 |
|
192 |
# Precompute job similarities
|
193 |
job_skills = jobs_df['required_skills'].fillna("").tolist()
|
194 |
job_embeddings = universal_model.encode(job_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
|
195 |
-
job_similarity = util.pytorch_cos_sim(
|
196 |
|
197 |
# Save precomputed resources
|
198 |
with open(TFIDF_PATH, 'wb') as f: pickle.dump(tfidf_vectorizer, f)
|
@@ -236,14 +243,16 @@ def evaluate_response(args):
|
|
236 |
def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
237 |
try:
|
238 |
if not skills_to_improve or courses_df.empty:
|
|
|
239 |
return []
|
240 |
|
241 |
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
|
242 |
if not skill_indices:
|
|
|
243 |
return []
|
244 |
|
245 |
similarities = course_similarity[skill_indices]
|
246 |
-
# Use
|
247 |
popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
|
248 |
completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
|
249 |
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
|
@@ -251,8 +260,14 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
251 |
target_level = 'Advanced' if upgrade else user_level
|
252 |
idx = np.argsort(-total_scores)[:5]
|
253 |
candidates = courses_df.iloc[idx]
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
except Exception as e:
|
257 |
logger.error(f"Course recommendation error: {e}")
|
258 |
return []
|
|
|
75 |
else:
|
76 |
df[col] = 0.0 # Default for other additional columns
|
77 |
|
78 |
+
# Ensure 'level' column has valid values (not empty)
|
79 |
+
if 'level' in df.columns:
|
80 |
+
df['level'] = df['level'].apply(lambda x: 'Intermediate' if pd.isna(x) or x.strip() == "" else x)
|
81 |
+
else:
|
82 |
+
logger.warning(f"'level' column missing in {file_path}. Adding default 'Intermediate'.")
|
83 |
+
df['level'] = 'Intermediate'
|
84 |
+
|
85 |
return df
|
86 |
except ValueError as ve:
|
87 |
logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
|
|
|
194 |
course_skills = courses_df['skills'].fillna("").tolist()
|
195 |
course_embeddings = universal_model.encode(course_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
|
196 |
skill_embeddings = universal_model.encode(questions_df['Skill'].unique().tolist(), batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu")
|
197 |
+
course_similarity = util.pytorch_cos_sim(skill_embeddings.clone().detach(), course_embeddings.clone().detach()).numpy()
|
198 |
|
199 |
# Precompute job similarities
|
200 |
job_skills = jobs_df['required_skills'].fillna("").tolist()
|
201 |
job_embeddings = universal_model.encode(job_skills, batch_size=128, convert_to_tensor=True, device="cuda" if torch.cuda.is_available() else "cpu").cpu().numpy()
|
202 |
+
job_similarity = util.pytorch_cos_sim(skill_embeddings.clone().detach(), job_embeddings.clone().detach()).numpy()
|
203 |
|
204 |
# Save precomputed resources
|
205 |
with open(TFIDF_PATH, 'wb') as f: pickle.dump(tfidf_vectorizer, f)
|
|
|
243 |
def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
244 |
try:
|
245 |
if not skills_to_improve or courses_df.empty:
|
246 |
+
logger.info("No skills to improve or courses_df is empty.")
|
247 |
return []
|
248 |
|
249 |
skill_indices = [list(questions_df['Skill'].unique()).index(skill) for skill in skills_to_improve if skill in questions_df['Skill'].unique()]
|
250 |
if not skill_indices:
|
251 |
+
logger.info("No matching skill indices found.")
|
252 |
return []
|
253 |
|
254 |
similarities = course_similarity[skill_indices]
|
255 |
+
# Use default arrays to avoid KeyError
|
256 |
popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
|
257 |
completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
|
258 |
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
|
|
|
260 |
target_level = 'Advanced' if upgrade else user_level
|
261 |
idx = np.argsort(-total_scores)[:5]
|
262 |
candidates = courses_df.iloc[idx]
|
263 |
+
|
264 |
+
# Filter by level, but fallback to all courses if none match
|
265 |
+
filtered_candidates = candidates[candidates['level'].str.contains(target_level, case=False, na=False)]
|
266 |
+
if filtered_candidates.empty:
|
267 |
+
logger.warning(f"No courses found for level {target_level}. Returning top courses regardless of level.")
|
268 |
+
filtered_candidates = candidates
|
269 |
+
|
270 |
+
return filtered_candidates[['course_title', 'Organization']].values.tolist()[:3]
|
271 |
except Exception as e:
|
272 |
logger.error(f"Course recommendation error: {e}")
|
273 |
return []
|