dielz's picture
delete radius
6a482cd verified
raw
history blame
5.73 kB
import pandas as pd
import numpy as np
import streamlit as st
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import logging
import warnings
import time
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")
geolocator = Nominatim(user_agent="job_recommendation_system")
def get_coordinates(location):
geolocator = Nominatim(user_agent="job_recommendation_system")
location = location.replace("-", ",")
if "Metropolitan Area" in location or "Region" in location:
city_name = location.split(" ")[0] # Ambil nama kota utama
location = city_name
location_obj = geolocator.geocode(location)
if location_obj:
return location_obj.latitude, location_obj.longitude
print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.")
city_or_area = location.split(',')[0].strip() # Ambil nama kota pertama
location_obj = geolocator.geocode(city_or_area)
if not location_obj:
country = location.split(',')[-1].strip() # Ambil nama negara terakhir
location_obj = geolocator.geocode(country)
retry_count = 0
while not location_obj and retry_count < 5:
print(f"Mencoba ulang untuk lokasi {location}...")
time.sleep(2) # Delay
location_obj = geolocator.geocode(location)
retry_count += 1
if location_obj:
return location_obj.latitude, location_obj.longitude
else:
print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.")
return None, None
sample_data = pd.read_csv('job_data_with_coordinates.csv')
# 1. Vektorisasi skill menggunakan CountVectorizer
def vectorize_skills(skills, all_skills):
vectorizer = CountVectorizer()
vectorizer.fit(all_skills)
skills_vector = vectorizer.transform(skills)
return skills_vector
# 2. Menghitung Cosine Similarity
def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf):
return cosine_similarity(user_skills_tfidf, job_skills_tfidf)
# 3. Menghitung jarak lokasi
def calculate_distance(job_coords, user_coords):
try:
return geodesic(job_coords, user_coords).km
except ValueError: # Menangani kasus koordinat yang tidak valid
return float('inf')
# Fungsi normalisasi jarak menggunakan log
def normalize_distance(distances):
normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance)
return normalized
# Menambahkan radius sebagai parameter
def prepare_and_recommend(df, user_skills, user_location):
# 1. Memastikan dataset memiliki koordinat
if 'latitude' not in df or 'longitude' not in df:
raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
# 2. Vektorisasi skill
all_skills = df['skills'].tolist()
user_skills_vtr = vectorize_skills([user_skills], all_skills)
job_skills_vtr = vectorize_skills(df['skills'], all_skills)
# 3. Menghitung Cosine Similarity antara user dan pekerjaan
cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
df['cosine_similarity'] = cosine_similarities[0]
# 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
user_coords = get_coordinates(user_location)
distances = []
for _, row in df.iterrows():
if pd.notna(row['latitude']) and pd.notna(row['longitude']):
job_coords = (row['latitude'], row['longitude'])
distance = calculate_distance(job_coords, user_coords)
distances.append(distance)
else:
distances.append(float('inf'))
df['distance (km)'] = distances
# 5. Filter pekerjaan berdasarkan radius
df = df[df['distance (km)']]
# 6. Normalisasi jarak
if not df.empty:
df['normalized_distance'] = normalize_distance(df['distance (km)'])
# 7. Menghitung skor akhir
df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
df['final score'] = df['final score'].round(2)
# 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
top_jobs = df.sort_values(by='final score', ascending=False).head(5)
else:
top_jobs = pd.DataFrame() # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius
return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
# Streamlit app
st.title('Job Findr')
st.write('Find your job with ease.')
user_skills = st.text_input('Enter your skills (comma-separated):')
user_location = st.text_input('Job location:')
if st.button('Get Recommendations'):
if user_skills and user_location:
recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
if recommended_jobs.empty:
st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
elif recommended_jobs['final score'].max() < 0.02:
st.warning('Maaf, hasil rekomendasi mungkin kurang relevan dengan keterampilan dan lokasi Anda. Silakan coba mengubah keterampilan atau lokasi Anda.')
else:
recommended_jobs['Rank'] = range(1, len(recommended_jobs) + 1)
columns = ['Rank', 'title', 'job_link', 'company', 'location', 'distance (km)', 'final score']
recommended_jobs = recommended_jobs[columns]
recommended_jobs.reset_index(drop=True, inplace=True)
st.dataframe(recommended_jobs.reset_index(drop=True))
else:
st.warning('Please enter your skills and location.')