dielz commited on
Commit
d2ecae9
·
verified ·
1 Parent(s): 1f044af

add radius

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -71,44 +71,48 @@ def normalize_distance(distances):
71
  normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
72
  return normalized
73
 
74
- # Fungsi utama untuk persiapan dan rekomendasi pekerjaan
75
- def prepare_and_recommend(df, user_skills, user_location):
76
  # 1. Memastikan dataset memiliki koordinat
77
  if 'latitude' not in df or 'longitude' not in df:
78
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
79
 
80
  # 2. Vektorisasi skill
81
- all_skills = df['skills'].tolist() # Semua skill dari dataset
82
- user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
83
- job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
84
 
85
  # 3. Menghitung Cosine Similarity antara user dan pekerjaan
86
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
87
  df['cosine_similarity'] = cosine_similarities[0]
88
 
89
  # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
90
- user_coords = get_coordinates(user_location) # Dapatkan koordinat user
91
  distances = []
92
  for _, row in df.iterrows():
93
- # Pengecekan apakah koordinat pekerjaan valid
94
- if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
95
  job_coords = (row['latitude'], row['longitude'])
96
  distance = calculate_distance(job_coords, user_coords)
97
  distances.append(distance)
98
  else:
99
- distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
100
-
101
  df['distance (km)'] = distances
102
 
103
- # 5. Normalisasi jarak
104
- df['normalized_distance'] = normalize_distance(df['distance (km)'])
 
 
 
 
105
 
106
- # 6. Menghitung skor akhir berdasarkan Cosine Similarity dan Normalisasi Jarak
107
- df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
108
- df['final score'] = df['final score'].round(2)
109
 
110
- # 7. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
111
- top_jobs = df.sort_values(by='final score', ascending=False).head(5)
 
 
112
 
113
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
114
 
@@ -118,10 +122,11 @@ st.write('Enter your skills and location to get job recommendations.')
118
 
119
  user_skills = st.text_input('Enter your skills (comma-separated):')
120
  user_location = st.text_input('Enter your location:')
 
121
 
122
  if st.button('Get Recommendations'):
123
  if user_skills and user_location:
124
- recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
125
  if recommended_jobs.empty:
126
  st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
127
  elif recommended_jobs['final score'].max() < 0.02:
 
71
  normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
72
  return normalized
73
 
74
+ # Menambahkan radius sebagai parameter
75
+ def prepare_and_recommend(df, user_skills, user_location, radius_km):
76
  # 1. Memastikan dataset memiliki koordinat
77
  if 'latitude' not in df or 'longitude' not in df:
78
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
79
 
80
  # 2. Vektorisasi skill
81
+ all_skills = df['skills'].tolist()
82
+ user_skills_vtr = vectorize_skills([user_skills], all_skills)
83
+ job_skills_vtr = vectorize_skills(df['skills'], all_skills)
84
 
85
  # 3. Menghitung Cosine Similarity antara user dan pekerjaan
86
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
87
  df['cosine_similarity'] = cosine_similarities[0]
88
 
89
  # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
90
+ user_coords = get_coordinates(user_location)
91
  distances = []
92
  for _, row in df.iterrows():
93
+ if pd.notna(row['latitude']) and pd.notna(row['longitude']):
 
94
  job_coords = (row['latitude'], row['longitude'])
95
  distance = calculate_distance(job_coords, user_coords)
96
  distances.append(distance)
97
  else:
98
+ distances.append(float('inf'))
 
99
  df['distance (km)'] = distances
100
 
101
+ # 5. Filter pekerjaan berdasarkan radius
102
+ df = df[df['distance (km)'] <= radius_km]
103
+
104
+ # 6. Normalisasi jarak
105
+ if not df.empty:
106
+ df['normalized_distance'] = normalize_distance(df['distance (km)'])
107
 
108
+ # 7. Menghitung skor akhir
109
+ df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
110
+ df['final score'] = df['final score'].round(2)
111
 
112
+ # 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
113
+ top_jobs = df.sort_values(by='final score', ascending=False).head(5)
114
+ else:
115
+ top_jobs = pd.DataFrame() # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius
116
 
117
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
118
 
 
122
 
123
  user_skills = st.text_input('Enter your skills (comma-separated):')
124
  user_location = st.text_input('Enter your location:')
125
+ radius_km = st.number_input('Enter your preferred radius (in km):', min_value=1, value=10)
126
 
127
  if st.button('Get Recommendations'):
128
  if user_skills and user_location:
129
+ recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location, radius_km)
130
  if recommended_jobs.empty:
131
  st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
132
  elif recommended_jobs['final score'].max() < 0.02: