dielz commited on
Commit
a4b0912
·
verified ·
1 Parent(s): 018e593

add weight

Browse files
Files changed (1) hide show
  1. app.py +14 -1
app.py CHANGED
@@ -65,19 +65,25 @@ def calculate_distance(job_coords, user_coords):
65
 
66
  # Fungsi utama untuk persiapan dan rekomendasi pekerjaan
67
  def prepare_and_recommend(df, user_skills, user_location):
 
 
68
  if 'latitude' not in df or 'longitude' not in df:
69
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
70
 
 
71
  all_skills = df['skills'].tolist() # Semua skill dari dataset
72
  user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
73
  job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
74
 
 
75
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
76
  df['cosine_similarity'] = cosine_similarities[0]
77
 
 
78
  user_coords = get_coordinates(user_location) # Dapatkan koordinat user
79
  distances = []
80
  for _, row in df.iterrows():
 
81
  if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
82
  job_coords = (row['latitude'], row['longitude'])
83
  distance = calculate_distance(job_coords, user_coords)
@@ -86,8 +92,15 @@ def prepare_and_recommend(df, user_skills, user_location):
86
  distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
87
 
88
  df['distance (km)'] = distances
89
- df['final score'] = df['cosine_similarity'] / (df['distance (km)'] + 1)
90
 
 
 
 
 
 
 
 
 
91
  top_jobs = df.sort_values(by='final score', ascending=False).head(5)
92
 
93
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
 
65
 
66
  # Fungsi utama untuk persiapan dan rekomendasi pekerjaan
67
  def prepare_and_recommend(df, user_skills, user_location):
68
+ def prepare_and_recommend(df, user_skills, user_location):
69
+ # 1. Memastikan dataset memiliki koordinat
70
  if 'latitude' not in df or 'longitude' not in df:
71
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
72
 
73
+ # 2. Vektorisasi skill
74
  all_skills = df['skills'].tolist() # Semua skill dari dataset
75
  user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
76
  job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
77
 
78
+ # 3. Menghitung Cosine Similarity antara user dan pekerjaan
79
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
80
  df['cosine_similarity'] = cosine_similarities[0]
81
 
82
+ # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
83
  user_coords = get_coordinates(user_location) # Dapatkan koordinat user
84
  distances = []
85
  for _, row in df.iterrows():
86
+ # Pengecekan apakah koordinat pekerjaan valid
87
  if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
88
  job_coords = (row['latitude'], row['longitude'])
89
  distance = calculate_distance(job_coords, user_coords)
 
92
  distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
93
 
94
  df['distance (km)'] = distances
 
95
 
96
+ # 5. Normalisasi jarak
97
+ df['normalized_distance'] = normalize_distance(df['distance (km)'])
98
+
99
+ # 6. Menghitung skor akhir berdasarkan Cosine Similarity dan Normalisasi Jarak
100
+ df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
101
+ df['final score'] = df['final score'].round(2)
102
+
103
+ # 7. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
104
  top_jobs = df.sort_values(by='final score', ascending=False).head(5)
105
 
106
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]