Muhammad541 commited on
Commit
5af9a5b
·
verified ·
1 Parent(s): d51cb13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -52,15 +52,29 @@ course_similarity = None
52
  job_similarity = None
53
 
54
  # Improved dataset loading with fallback
55
- def load_dataset(file_path, required_columns=[], fallback_data=None):
56
  try:
57
  df = pd.read_csv(file_path)
58
- missing_columns = [col for col in required_columns if col not in df.columns]
59
- if missing_columns:
60
- logger.warning(f"Columns {missing_columns} missing in {file_path}. Using default values.")
61
- for col in required_columns:
62
- if col not in df.columns:
63
- df[col] = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  return df
65
  except ValueError as ve:
66
  logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
@@ -76,14 +90,14 @@ def load_dataset(file_path, required_columns=[], fallback_data=None):
76
  return None
77
 
78
  # Load datasets with fallbacks
79
- questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], {
80
  'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
81
  'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
82
  'Intermediate Python question', 'Basic Kubernetes question'],
83
  'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
84
  })
85
 
86
- courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], {
87
  'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
88
  'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
89
  'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
@@ -92,7 +106,7 @@ courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "
92
  'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
93
  })
94
 
95
- jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], {
96
  'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
97
  'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
98
  'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
@@ -229,7 +243,10 @@ def recommend_courses(skills_to_improve, user_level, upgrade=False):
229
  return []
230
 
231
  similarities = course_similarity[skill_indices]
232
- total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * courses_df['popularity'].values + 0.2 * courses_df['completion_rate'].values
 
 
 
233
 
234
  target_level = 'Advanced' if upgrade else user_level
235
  idx = np.argsort(-total_scores)[:5]
 
52
  job_similarity = None
53
 
54
  # Improved dataset loading with fallback
55
+ def load_dataset(file_path, required_columns=[], additional_columns=['popularity', 'completion_rate'], fallback_data=None):
56
  try:
57
  df = pd.read_csv(file_path)
58
+ missing_required = [col for col in required_columns if col not in df.columns]
59
+ missing_additional = [col for col in additional_columns if col not in df.columns]
60
+
61
+ # Handle missing required columns
62
+ if missing_required:
63
+ logger.warning(f"Required columns {missing_required} missing in {file_path}. Adding empty values.")
64
+ for col in missing_required:
65
+ df[col] = ""
66
+
67
+ # Handle missing additional columns (popularity, completion_rate, etc.)
68
+ if missing_additional:
69
+ logger.warning(f"Additional columns {missing_additional} missing in {file_path}. Adding default values.")
70
+ for col in missing_additional:
71
+ if col == 'popularity':
72
+ df[col] = 0.8 # Default value for popularity
73
+ elif col == 'completion_rate':
74
+ df[col] = 0.7 # Default value for completion_rate
75
+ else:
76
+ df[col] = 0.0 # Default for other additional columns
77
+
78
  return df
79
  except ValueError as ve:
80
  logger.error(f"ValueError loading {file_path}: {ve}. Using fallback data.")
 
90
  return None
91
 
92
  # Load datasets with fallbacks
93
+ questions_df = load_dataset("Generated_Skill-Based_Questions.csv", ["Skill", "Question", "Answer"], [], {
94
  'Skill': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
95
  'Question': ['Advanced Linux question', 'Advanced Git question', 'Basic Node.js question',
96
  'Intermediate Python question', 'Basic Kubernetes question'],
97
  'Answer': ['Linux answer', 'Git answer', 'Node.js answer', 'Python answer', 'Kubernetes answer']
98
  })
99
 
100
+ courses_df = load_dataset("coursera_course_dataset_v2_no_null.csv", ["skills", "course_title", "Organization", "level"], ['popularity', 'completion_rate'], {
101
  'skills': ['Linux', 'Git', 'Node.js', 'Python', 'Kubernetes'],
102
  'course_title': ['Linux Admin', 'Git Mastery', 'Node.js Advanced', 'Python for Data', 'Kubernetes Basics'],
103
  'Organization': ['Coursera', 'Udemy', 'Pluralsight', 'edX', 'Linux Foundation'],
 
106
  'completion_rate': [0.65, 0.7, 0.6, 0.8, 0.75]
107
  })
108
 
109
+ jobs_df = load_dataset("Updated_Job_Posting_Dataset.csv", ["job_title", "company_name", "location", "required_skills", "job_description"], [], {
110
  'job_title': ['DevOps Engineer', 'Cloud Architect', 'Software Engineer', 'Data Scientist', 'Security Analyst'],
111
  'company_name': ['Tech Corp', 'Cloud Inc', 'Tech Solutions', 'Data Co', 'SecuriTech'],
112
  'location': ['Remote', 'Islamabad', 'Karachi', 'Remote', 'Islamabad'],
 
243
  return []
244
 
245
  similarities = course_similarity[skill_indices]
246
+ # Use get() with default arrays to avoid KeyError
247
+ popularity = courses_df['popularity'].values if 'popularity' in courses_df else np.full(len(courses_df), 0.8)
248
+ completion_rate = courses_df['completion_rate'].values if 'completion_rate' in courses_df else np.full(len(courses_df), 0.7)
249
+ total_scores = 0.6 * np.max(similarities, axis=0) + 0.2 * popularity + 0.2 * completion_rate
250
 
251
  target_level = 'Advanced' if upgrade else user_level
252
  idx = np.argsort(-total_scores)[:5]