Spaces:
Sleeping
Sleeping
add radius
Browse files
app.py
CHANGED
|
@@ -71,44 +71,48 @@ def normalize_distance(distances):
|
|
| 71 |
normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
|
| 72 |
return normalized
|
| 73 |
|
| 74 |
-
#
|
| 75 |
-
def prepare_and_recommend(df, user_skills, user_location):
|
| 76 |
# 1. Memastikan dataset memiliki koordinat
|
| 77 |
if 'latitude' not in df or 'longitude' not in df:
|
| 78 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
| 79 |
|
| 80 |
# 2. Vektorisasi skill
|
| 81 |
-
all_skills = df['skills'].tolist()
|
| 82 |
-
user_skills_vtr = vectorize_skills([user_skills], all_skills)
|
| 83 |
-
job_skills_vtr = vectorize_skills(df['skills'], all_skills)
|
| 84 |
|
| 85 |
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
|
| 86 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
| 87 |
df['cosine_similarity'] = cosine_similarities[0]
|
| 88 |
|
| 89 |
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
|
| 90 |
-
user_coords = get_coordinates(user_location)
|
| 91 |
distances = []
|
| 92 |
for _, row in df.iterrows():
|
| 93 |
-
|
| 94 |
-
if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
|
| 95 |
job_coords = (row['latitude'], row['longitude'])
|
| 96 |
distance = calculate_distance(job_coords, user_coords)
|
| 97 |
distances.append(distance)
|
| 98 |
else:
|
| 99 |
-
distances.append(float('inf'))
|
| 100 |
-
|
| 101 |
df['distance (km)'] = distances
|
| 102 |
|
| 103 |
-
# 5.
|
| 104 |
-
df
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
| 112 |
|
| 113 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|
| 114 |
|
|
@@ -118,10 +122,11 @@ st.write('Enter your skills and location to get job recommendations.')
|
|
| 118 |
|
| 119 |
user_skills = st.text_input('Enter your skills (comma-separated):')
|
| 120 |
user_location = st.text_input('Enter your location:')
|
|
|
|
| 121 |
|
| 122 |
if st.button('Get Recommendations'):
|
| 123 |
if user_skills and user_location:
|
| 124 |
-
recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
|
| 125 |
if recommended_jobs.empty:
|
| 126 |
st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
|
| 127 |
elif recommended_jobs['final score'].max() < 0.02:
|
|
|
|
| 71 |
normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
|
| 72 |
return normalized
|
| 73 |
|
| 74 |
+
# Menambahkan radius sebagai parameter
|
| 75 |
+
def prepare_and_recommend(df, user_skills, user_location, radius_km):
|
| 76 |
# 1. Memastikan dataset memiliki koordinat
|
| 77 |
if 'latitude' not in df or 'longitude' not in df:
|
| 78 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
| 79 |
|
| 80 |
# 2. Vektorisasi skill
|
| 81 |
+
all_skills = df['skills'].tolist()
|
| 82 |
+
user_skills_vtr = vectorize_skills([user_skills], all_skills)
|
| 83 |
+
job_skills_vtr = vectorize_skills(df['skills'], all_skills)
|
| 84 |
|
| 85 |
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
|
| 86 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
| 87 |
df['cosine_similarity'] = cosine_similarities[0]
|
| 88 |
|
| 89 |
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
|
| 90 |
+
user_coords = get_coordinates(user_location)
|
| 91 |
distances = []
|
| 92 |
for _, row in df.iterrows():
|
| 93 |
+
if pd.notna(row['latitude']) and pd.notna(row['longitude']):
|
|
|
|
| 94 |
job_coords = (row['latitude'], row['longitude'])
|
| 95 |
distance = calculate_distance(job_coords, user_coords)
|
| 96 |
distances.append(distance)
|
| 97 |
else:
|
| 98 |
+
distances.append(float('inf'))
|
|
|
|
| 99 |
df['distance (km)'] = distances
|
| 100 |
|
| 101 |
+
# 5. Filter pekerjaan berdasarkan radius
|
| 102 |
+
df = df[df['distance (km)'] <= radius_km]
|
| 103 |
+
|
| 104 |
+
# 6. Normalisasi jarak
|
| 105 |
+
if not df.empty:
|
| 106 |
+
df['normalized_distance'] = normalize_distance(df['distance (km)'])
|
| 107 |
|
| 108 |
+
# 7. Menghitung skor akhir
|
| 109 |
+
df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
|
| 110 |
+
df['final score'] = df['final score'].round(2)
|
| 111 |
|
| 112 |
+
# 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
|
| 113 |
+
top_jobs = df.sort_values(by='final score', ascending=False).head(5)
|
| 114 |
+
else:
|
| 115 |
+
top_jobs = pd.DataFrame() # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius
|
| 116 |
|
| 117 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|
| 118 |
|
|
|
|
| 122 |
|
| 123 |
user_skills = st.text_input('Enter your skills (comma-separated):')
|
| 124 |
user_location = st.text_input('Enter your location:')
|
| 125 |
+
radius_km = st.number_input('Enter your preferred radius (in km):', min_value=1, value=10)
|
| 126 |
|
| 127 |
if st.button('Get Recommendations'):
|
| 128 |
if user_skills and user_location:
|
| 129 |
+
recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location, radius_km)
|
| 130 |
if recommended_jobs.empty:
|
| 131 |
st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
|
| 132 |
elif recommended_jobs['final score'].max() < 0.02:
|