import pandas as pd import numpy as np import streamlit as st from geopy.geocoders import Nominatim from geopy.distance import geodesic from sklearn.feature_extraction.text import CountVectorizer from sklearn.metrics.pairwise import cosine_similarity import logging import warnings import time logging.getLogger("urllib3").setLevel(logging.CRITICAL) warnings.filterwarnings("ignore", category=UserWarning, module="urllib3") geolocator = Nominatim(user_agent="job_recommendation_system") def get_coordinates(location): geolocator = Nominatim(user_agent="job_recommendation_system") location = location.replace("-", ",") if "Metropolitan Area" in location or "Region" in location: city_name = location.split(" ")[0] # Ambil nama kota utama location = city_name location_obj = geolocator.geocode(location) if location_obj: return location_obj.latitude, location_obj.longitude print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.") city_or_area = location.split(',')[0].strip() # Ambil nama kota pertama location_obj = geolocator.geocode(city_or_area) if not location_obj: country = location.split(',')[-1].strip() # Ambil nama negara terakhir location_obj = geolocator.geocode(country) retry_count = 0 while not location_obj and retry_count < 5: print(f"Mencoba ulang untuk lokasi {location}...") time.sleep(2) # Delay location_obj = geolocator.geocode(location) retry_count += 1 if location_obj: return location_obj.latitude, location_obj.longitude else: print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.") return None, None sample_data = pd.read_csv('job_data_with_coordinates.csv') # 1. Vektorisasi skill menggunakan CountVectorizer def vectorize_skills(skills, all_skills): vectorizer = CountVectorizer() vectorizer.fit(all_skills) skills_vector = vectorizer.transform(skills) return skills_vector # 2. Menghitung Cosine Similarity def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf): return cosine_similarity(user_skills_tfidf, job_skills_tfidf) # 3. Menghitung jarak lokasi def calculate_distance(job_coords, user_coords): try: return geodesic(job_coords, user_coords).km except ValueError: # Menangani kasus koordinat yang tidak valid return float('inf') # Fungsi normalisasi jarak menggunakan log def normalize_distance(distances): normalized = 1 / (1 + np.log1p(distances)) # log1p untuk menangani log(1 + distance) return normalized # Menambahkan radius sebagai parameter def prepare_and_recommend(df, user_skills, user_location): # 1. Memastikan dataset memiliki koordinat if 'latitude' not in df or 'longitude' not in df: raise ValueError("Dataset harus memiliki kolom latitude dan longitude") # 2. Vektorisasi skill all_skills = df['skills'].tolist() user_skills_vtr = vectorize_skills([user_skills], all_skills) job_skills_vtr = vectorize_skills(df['skills'], all_skills) # 3. Menghitung Cosine Similarity antara user dan pekerjaan cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr) df['cosine_similarity'] = cosine_similarities[0] # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user user_coords = get_coordinates(user_location) distances = [] for _, row in df.iterrows(): if pd.notna(row['latitude']) and pd.notna(row['longitude']): job_coords = (row['latitude'], row['longitude']) distance = calculate_distance(job_coords, user_coords) distances.append(distance) else: distances.append(float('inf')) df['distance (km)'] = distances # # 5. Filter pekerjaan berdasarkan radius # df = df[df['distance (km)']] # 6. Normalisasi jarak if not df.empty: df['normalized_distance'] = normalize_distance(df['distance (km)']) # 7. Menghitung skor akhir df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance']) df['final score'] = df['final score'].round(2) # 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir top_jobs = df.sort_values(by='final score', ascending=False).head(5) else: top_jobs = pd.DataFrame() # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']] # Streamlit app st.title('Job Findr') st.write('Find your job with ease.') user_skills = st.text_input('Enter your skills (comma-separated):') user_location = st.text_input('Job location:') if st.button('Get Recommendations'): if user_skills and user_location: recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location) if recommended_jobs.empty: st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.') elif recommended_jobs['final score'].max() < 0.02: st.warning('Maaf, hasil rekomendasi mungkin kurang relevan dengan keterampilan dan lokasi Anda. Silakan coba mengubah keterampilan atau lokasi Anda.') else: recommended_jobs['Rank'] = range(1, len(recommended_jobs) + 1) columns = ['Rank', 'title', 'job_link', 'company', 'location', 'distance (km)'] recommended_jobs = recommended_jobs[columns] recommended_jobs.reset_index(drop=True, inplace=True) st.dataframe(recommended_jobs.reset_index(drop=True)) else: st.warning('Please enter your skills and location.')