dielz commited on
Commit
bcb3e72
·
verified ·
1 Parent(s): b1ca1b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from geopy.geocoders import Nominatim
4
+ from geopy.distance import geodesic
5
+ from sklearn.feature_extraction.text import CountVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import logging
8
+ import warnings
9
+ import time
10
+
11
+ # Menyembunyikan pesan peringatan dari urllib3
12
+ logging.getLogger("urllib3").setLevel(logging.CRITICAL)
13
+ warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")
14
+
15
+ # Inisialisasi geolocator untuk geocoding
16
+ geolocator = Nominatim(user_agent="job_recommendation_system")
17
+
18
+ # Fungsi untuk mendapatkan koordinat lokasi
19
+ def get_coordinates(location):
20
+ geolocator = Nominatim(user_agent="job_recommendation_system")
21
+
22
+ # Format lokasi
23
+ location = location.replace("-", ",") # Ganti tanda hubung dengan koma
24
+
25
+ # Tangani nama lokasi dengan "Metropolitan Area" atau "Region"
26
+ if "Metropolitan Area" in location or "Region" in location:
27
+ city_name = location.split(" ")[0] # Ambil nama kota utama
28
+ location = city_name
29
+
30
+ # Coba cari lokasi
31
+ location_obj = geolocator.geocode(location)
32
+ if location_obj:
33
+ return location_obj.latitude, location_obj.longitude
34
+
35
+ # Jika tidak ditemukan, coba nama kota atau negara
36
+ print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.")
37
+ city_or_area = location.split(',')[0].strip() # Ambil nama kota pertama
38
+ location_obj = geolocator.geocode(city_or_area)
39
+
40
+ if not location_obj:
41
+ country = location.split(',')[-1].strip() # Ambil nama negara terakhir
42
+ location_obj = geolocator.geocode(country)
43
+
44
+ # Jika tetap gagal, coba retry
45
+ retry_count = 0
46
+ while not location_obj and retry_count < 5:
47
+ print(f"Mencoba ulang untuk lokasi {location}...")
48
+ time.sleep(2) # Delay
49
+ location_obj = geolocator.geocode(location)
50
+ retry_count += 1
51
+
52
+ # Return koordinat jika ditemukan
53
+ if location_obj:
54
+ return location_obj.latitude, location_obj.longitude
55
+ else:
56
+ print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.")
57
+ return None, None
58
+
59
+ # Load the dataset
60
+ sample_data = pd.read_csv('job_data_with_coordinates.csv')
61
+
62
+ # 1. Vektorisasi skill menggunakan CountVectorizer
63
+ def vectorize_skills(skills, all_skills):
64
+ vectorizer = CountVectorizer()
65
+ vectorizer.fit(all_skills)
66
+ skills_vector = vectorizer.transform(skills)
67
+ return skills_vector
68
+
69
+ # 2. Menghitung Cosine Similarity
70
+ def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf):
71
+ return cosine_similarity(user_skills_tfidf, job_skills_tfidf)
72
+
73
+ # 3. Menghitung jarak lokasi
74
+ def calculate_distance(job_coords, user_coords):
75
+ try:
76
+ return geodesic(job_coords, user_coords).km
77
+ except ValueError: # Menangani kasus koordinat yang tidak valid
78
+ return float('inf')
79
+
80
+ # 4. Fungsi utama untuk persiapan dan rekomendasi pekerjaan
81
+ def prepare_and_recommend(df, user_skills, user_location):
82
+ # 1. Memastikan dataset memiliki koordinat
83
+ if 'latitude' not in df or 'longitude' not in df:
84
+ raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
85
+
86
+ # 2. Vektorisasi skill
87
+ all_skills = df['skills'].tolist() # Semua skill dari dataset
88
+ user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
89
+ job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
90
+
91
+ # 3. Menghitung Cosine Similarity antara user dan pekerjaan
92
+ cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
93
+ df['cosine_similarity'] = cosine_similarities[0]
94
+
95
+ # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
96
+ user_coords = get_coordinates(user_location) # Dapatkan koordinat user
97
+ distances = []
98
+ for _, row in df.iterrows():
99
+ # Pengecekan apakah koordinat pekerjaan valid
100
+ if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
101
+ job_coords = (row['latitude'], row['longitude'])
102
+ distance = calculate_distance(job_coords, user_coords)
103
+ distances.append(distance)
104
+ else:
105
+ distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
106
+
107
+ df['distance (km)'] = distances
108
+
109
+ # 5. Menghitung skor akhir berdasarkan Cosine Similarity dan Jarak
110
+ df['final score'] = df['cosine_similarity'] / (df['distance (km)'] + 1)
111
+
112
+ # 6. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
113
+ top_jobs = df.sort_values(by='final score', ascending=False).head(5)
114
+
115
+ return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
116
+
117
+ # Streamlit app
118
+ st.title('Job Recommendation System')
119
+ st.write('Enter your skills and location to get job recommendations.')
120
+
121
+ user_skills = st.text_input('Enter your skills (comma-separated):')
122
+ user_location = st.text_input('Enter your location:')
123
+
124
+ if st.button('Get Recommendations'):
125
+ if user_skills and user_location:
126
+ recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
127
+ st.dataframe(recommended_jobs)
128
+ else:
129
+ st.warning('Please enter your skills and location.')