Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -52,15 +52,29 @@ course_similarity = None
|
|
52 |
job_similarity = None
|
53 |
|
54 |
# Improved dataset loading with fallback
|
55 |
-
def load_dataset(file_path, required_columns=[], fallback_data=None):
|
56 |
try:
|
57 |
df = pd.read_csv(file_path)
|
58 |
-
|
59 |
-
if
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
return df
|
65 |
except ValueError as ve:
|
66 |
logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
|
@@ -76,14 +90,14 @@ def load_dataset(file_path, required_columns=[], fallback_data=None):
|
|
76 |
return None
|
77 |
|
78 |
# Load datasets with fallbacks
|
79 |
-
questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], {
|
80 |
'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
81 |
'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
|
82 |
'Intermediate Python question', 'Basic Kubernetes question'],
|
83 |
'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
|
84 |
})
|
85 |
|
86 |
-
courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
|
87 |
'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
88 |
'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
|
89 |
'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
|
@@ -92,7 +106,7 @@ courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "
|
|
92 |
'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
|
93 |
})
|
94 |
|
95 |
-
jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
|
96 |
'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
|
97 |
'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
|
98 |
'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
|
@@ -229,7 +243,10 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
|
|
229 |
return []
|
230 |
|
231 |
similarities = course_similarity[skill_indices]
|
232 |
-
|
|
|
|
|
|
|
233 |
|
234 |
target_level = 'Advanced' if upgrade else user_level
|
235 |
idx = np.argsort(-total_scores)[:5]
|
|
|
52 |
job_similarity = None
|
53 |
|
54 |
# Improved dataset loading with fallback
|
55 |
+
def load_dataset(file_path, required_columns=[], additional_columns=['popularity', 'completion_rate'], fallback_data=None):
|
56 |
try:
|
57 |
df = pd.read_csv(file_path)
|
58 |
+
missing_required = [col for col in required_columns if col not in df.columns]
|
59 |
+
missing_additional = [col for col in additional_columns if col not in df.columns]
|
60 |
+
|
61 |
+
# Handle missing required columns
|
62 |
+
if missing_required:
|
63 |
+
logger.warning(f"Required columns {missing_required} missing in {file_path}. Adding empty values.")
|
64 |
+
for col in missing_required:
|
65 |
+
df[col] = ""
|
66 |
+
|
67 |
+
# Handle missing additional columns (popularity, completion_rate, etc.)
|
68 |
+
if missing_additional:
|
69 |
+
logger.warning(f"Additional columns {missing_additional} missing in {file_path}. Adding default values.")
|
70 |
+
for col in missing_additional:
|
71 |
+
if col == 'popularity':
|
72 |
+
df[col] = 0.8 # Default value for popularity
|
73 |
+
elif col == 'completion_rate':
|
74 |
+
df[col] = 0.7 # Default value for completion_rate
|
75 |
+
else:
|
76 |
+
df[col] = 0.0 # Default for other additional columns
|
77 |
+
|
78 |
return df
|
79 |
except ValueError as ve:
|
80 |
logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
|
|
|
90 |
return None
|
91 |
|
92 |
# Load datasets with fallbacks
|
93 |
+
questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], [], {
|
94 |
'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
95 |
'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
|
96 |
'Intermediate Python question', 'Basic Kubernetes question'],
|
97 |
'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
|
98 |
})
|
99 |
|
100 |
+
courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], ['popularity', 'completion_rate'], {
|
101 |
'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
|
102 |
'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
|
103 |
'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
|
|
|
106 |
'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
|
107 |
})
|
108 |
|
109 |
+
jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], [], {
|
110 |
'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
|
111 |
'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
|
112 |
'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
|
|
|
243 |
return []
|
244 |
|
245 |
similarities = course_similarity[skill_indices]
|
246 |
+
# Use get() with default arrays to avoid KeyError
|
247 |
+
popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
|
248 |
+
completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
|
249 |
+
total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
|
250 |
|
251 |
target_level = 'Advanced' if upgrade else user_level
|
252 |
idx = np.argsort(-total_scores)[:5]
|