Spaces:
Sleeping
Sleeping
add radius
Browse files
app.py
CHANGED
@@ -71,44 +71,48 @@ def normalize_distance(distances):
|
|
71 |
normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
|
72 |
return normalized
|
73 |
|
74 |
-
#
|
75 |
-
def prepare_and_recommend(df, user_skills, user_location):
|
76 |
# 1. Memastikan dataset memiliki koordinat
|
77 |
if 'latitude' not in df or 'longitude' not in df:
|
78 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
79 |
|
80 |
# 2. Vektorisasi skill
|
81 |
-
all_skills = df['skills'].tolist()
|
82 |
-
user_skills_vtr = vectorize_skills([user_skills], all_skills)
|
83 |
-
job_skills_vtr = vectorize_skills(df['skills'], all_skills)
|
84 |
|
85 |
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
|
86 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
87 |
df['cosine_similarity'] = cosine_similarities[0]
|
88 |
|
89 |
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
|
90 |
-
user_coords = get_coordinates(user_location)
|
91 |
distances = []
|
92 |
for _, row in df.iterrows():
|
93 |
-
|
94 |
-
if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
|
95 |
job_coords = (row['latitude'], row['longitude'])
|
96 |
distance = calculate_distance(job_coords, user_coords)
|
97 |
distances.append(distance)
|
98 |
else:
|
99 |
-
distances.append(float('inf'))
|
100 |
-
|
101 |
df['distance (km)'] = distances
|
102 |
|
103 |
-
# 5.
|
104 |
-
df
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
|
110 |
-
|
111 |
-
|
|
|
|
|
112 |
|
113 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|
114 |
|
@@ -118,10 +122,11 @@ st.write('Enter your skills and location to get job recommendations.')
|
|
118 |
|
119 |
user_skills = st.text_input('Enter your skills (comma-separated):')
|
120 |
user_location = st.text_input('Enter your location:')
|
|
|
121 |
|
122 |
if st.button('Get Recommendations'):
|
123 |
if user_skills and user_location:
|
124 |
-
recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
|
125 |
if recommended_jobs.empty:
|
126 |
st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
|
127 |
elif recommended_jobs['final score'].max() < 0.02:
|
|
|
71 |
normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
|
72 |
return normalized
|
73 |
|
74 |
+
# Menambahkan radius sebagai parameter
|
75 |
+
def prepare_and_recommend(df, user_skills, user_location, radius_km):
|
76 |
# 1. Memastikan dataset memiliki koordinat
|
77 |
if 'latitude' not in df or 'longitude' not in df:
|
78 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
79 |
|
80 |
# 2. Vektorisasi skill
|
81 |
+
all_skills = df['skills'].tolist()
|
82 |
+
user_skills_vtr = vectorize_skills([user_skills], all_skills)
|
83 |
+
job_skills_vtr = vectorize_skills(df['skills'], all_skills)
|
84 |
|
85 |
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
|
86 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
87 |
df['cosine_similarity'] = cosine_similarities[0]
|
88 |
|
89 |
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
|
90 |
+
user_coords = get_coordinates(user_location)
|
91 |
distances = []
|
92 |
for _, row in df.iterrows():
|
93 |
+
if pd.notna(row['latitude']) and pd.notna(row['longitude']):
|
|
|
94 |
job_coords = (row['latitude'], row['longitude'])
|
95 |
distance = calculate_distance(job_coords, user_coords)
|
96 |
distances.append(distance)
|
97 |
else:
|
98 |
+
distances.append(float('inf'))
|
|
|
99 |
df['distance (km)'] = distances
|
100 |
|
101 |
+
# 5. Filter pekerjaan berdasarkan radius
|
102 |
+
df = df[df['distance (km)'] <= radius_km]
|
103 |
+
|
104 |
+
# 6. Normalisasi jarak
|
105 |
+
if not df.empty:
|
106 |
+
df['normalized_distance'] = normalize_distance(df['distance (km)'])
|
107 |
|
108 |
+
# 7. Menghitung skor akhir
|
109 |
+
df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
|
110 |
+
df['final score'] = df['final score'].round(2)
|
111 |
|
112 |
+
# 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
|
113 |
+
top_jobs = df.sort_values(by='final score', ascending=False).head(5)
|
114 |
+
else:
|
115 |
+
top_jobs = pd.DataFrame() # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius
|
116 |
|
117 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|
118 |
|
|
|
122 |
|
123 |
user_skills = st.text_input('Enter your skills (comma-separated):')
|
124 |
user_location = st.text_input('Enter your location:')
|
125 |
+
radius_km = st.number_input('Enter your preferred radius (in km):', min_value=1, value=10)
|
126 |
|
127 |
if st.button('Get Recommendations'):
|
128 |
if user_skills and user_location:
|
129 |
+
recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location, radius_km)
|
130 |
if recommended_jobs.empty:
|
131 |
st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
|
132 |
elif recommended_jobs['final score'].max() < 0.02:
|