Spaces:
Sleeping
Sleeping
add weight
Browse files
app.py
CHANGED
@@ -65,19 +65,25 @@ def calculate_distance(job_coords, user_coords):
|
|
65 |
|
66 |
# Fungsi utama untuk persiapan dan rekomendasi pekerjaan
|
67 |
def prepare_and_recommend(df, user_skills, user_location):
|
|
|
|
|
68 |
if 'latitude' not in df or 'longitude' not in df:
|
69 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
70 |
|
|
|
71 |
all_skills = df['skills'].tolist() # Semua skill dari dataset
|
72 |
user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
|
73 |
job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
|
74 |
|
|
|
75 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
76 |
df['cosine_similarity'] = cosine_similarities[0]
|
77 |
|
|
|
78 |
user_coords = get_coordinates(user_location) # Dapatkan koordinat user
|
79 |
distances = []
|
80 |
for _, row in df.iterrows():
|
|
|
81 |
if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
|
82 |
job_coords = (row['latitude'], row['longitude'])
|
83 |
distance = calculate_distance(job_coords, user_coords)
|
@@ -86,8 +92,15 @@ def prepare_and_recommend(df, user_skills, user_location):
|
|
86 |
distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
|
87 |
|
88 |
df['distance (km)'] = distances
|
89 |
-
df['final score'] = df['cosine_similarity'] / (df['distance (km)'] + 1)
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
top_jobs = df.sort_values(by='final score', ascending=False).head(5)
|
92 |
|
93 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|
|
|
65 |
|
66 |
# Fungsi utama untuk persiapan dan rekomendasi pekerjaan
|
67 |
def prepare_and_recommend(df, user_skills, user_location):
|
68 |
+
def prepare_and_recommend(df, user_skills, user_location):
|
69 |
+
# 1. Memastikan dataset memiliki koordinat
|
70 |
if 'latitude' not in df or 'longitude' not in df:
|
71 |
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
|
72 |
|
73 |
+
# 2. Vektorisasi skill
|
74 |
all_skills = df['skills'].tolist() # Semua skill dari dataset
|
75 |
user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
|
76 |
job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
|
77 |
|
78 |
+
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
|
79 |
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
|
80 |
df['cosine_similarity'] = cosine_similarities[0]
|
81 |
|
82 |
+
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
|
83 |
user_coords = get_coordinates(user_location) # Dapatkan koordinat user
|
84 |
distances = []
|
85 |
for _, row in df.iterrows():
|
86 |
+
# Pengecekan apakah koordinat pekerjaan valid
|
87 |
if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
|
88 |
job_coords = (row['latitude'], row['longitude'])
|
89 |
distance = calculate_distance(job_coords, user_coords)
|
|
|
92 |
distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
|
93 |
|
94 |
df['distance (km)'] = distances
|
|
|
95 |
|
96 |
+
# 5. Normalisasi jarak
|
97 |
+
df['normalized_distance'] = normalize_distance(df['distance (km)'])
|
98 |
+
|
99 |
+
# 6. Menghitung skor akhir berdasarkan Cosine Similarity dan Normalisasi Jarak
|
100 |
+
df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
|
101 |
+
df['final score'] = df['final score'].round(2)
|
102 |
+
|
103 |
+
# 7. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
|
104 |
top_jobs = df.sort_values(by='final score', ascending=False).head(5)
|
105 |
|
106 |
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
|