dielz commited on
Commit
cacd96a
·
verified ·
1 Parent(s): b50beb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -24
app.py CHANGED
@@ -8,31 +8,23 @@ import logging
8
  import warnings
9
  import time
10
 
11
- # Menyembunyikan pesan peringatan dari urllib3
12
  logging.getLogger("urllib3").setLevel(logging.CRITICAL)
13
  warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")
14
 
15
- # Inisialisasi geolocator untuk geocoding
16
  geolocator = Nominatim(user_agent="job_recommendation_system")
17
 
18
- # Fungsi untuk mendapatkan koordinat lokasi
19
  def get_coordinates(location):
20
  geolocator = Nominatim(user_agent="job_recommendation_system")
 
21
 
22
- # Format lokasi
23
- location = location.replace("-", ",") # Ganti tanda hubung dengan koma
24
-
25
- # Tangani nama lokasi dengan "Metropolitan Area" atau "Region"
26
  if "Metropolitan Area" in location or "Region" in location:
27
  city_name = location.split(" ")[0] # Ambil nama kota utama
28
  location = city_name
29
 
30
- # Coba cari lokasi
31
  location_obj = geolocator.geocode(location)
32
  if location_obj:
33
  return location_obj.latitude, location_obj.longitude
34
 
35
- # Jika tidak ditemukan, coba nama kota atau negara
36
  print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.")
37
  city_or_area = location.split(',')[0].strip() # Ambil nama kota pertama
38
  location_obj = geolocator.geocode(city_or_area)
@@ -41,7 +33,6 @@ def get_coordinates(location):
41
  country = location.split(',')[-1].strip() # Ambil nama negara terakhir
42
  location_obj = geolocator.geocode(country)
43
 
44
- # Jika tetap gagal, coba retry
45
  retry_count = 0
46
  while not location_obj and retry_count < 5:
47
  print(f"Mencoba ulang untuk lokasi {location}...")
@@ -49,54 +40,44 @@ def get_coordinates(location):
49
  location_obj = geolocator.geocode(location)
50
  retry_count += 1
51
 
52
- # Return koordinat jika ditemukan
53
  if location_obj:
54
  return location_obj.latitude, location_obj.longitude
55
  else:
56
  print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.")
57
  return None, None
58
 
59
- # Load the dataset
60
  sample_data = pd.read_csv('job_data_with_coordinates.csv')
61
 
62
- # 1. Vektorisasi skill menggunakan CountVectorizer
63
  def vectorize_skills(skills, all_skills):
64
  vectorizer = CountVectorizer()
65
  vectorizer.fit(all_skills)
66
  skills_vector = vectorizer.transform(skills)
67
  return skills_vector
68
 
69
- # 2. Menghitung Cosine Similarity
70
  def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf):
71
  return cosine_similarity(user_skills_tfidf, job_skills_tfidf)
72
 
73
- # 3. Menghitung jarak lokasi
74
  def calculate_distance(job_coords, user_coords):
75
  try:
76
  return geodesic(job_coords, user_coords).km
77
  except ValueError: # Menangani kasus koordinat yang tidak valid
78
  return float('inf')
79
 
80
- # 4. Fungsi utama untuk persiapan dan rekomendasi pekerjaan
81
  def prepare_and_recommend(df, user_skills, user_location):
82
- # 1. Memastikan dataset memiliki koordinat
83
  if 'latitude' not in df or 'longitude' not in df:
84
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
85
 
86
- # 2. Vektorisasi skill
87
  all_skills = df['skills'].tolist() # Semua skill dari dataset
88
  user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
89
  job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
90
 
91
- # 3. Menghitung Cosine Similarity antara user dan pekerjaan
92
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
93
  df['cosine_similarity'] = cosine_similarities[0]
94
 
95
- # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
96
  user_coords = get_coordinates(user_location) # Dapatkan koordinat user
97
  distances = []
98
  for _, row in df.iterrows():
99
- # Pengecekan apakah koordinat pekerjaan valid
100
  if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
101
  job_coords = (row['latitude'], row['longitude'])
102
  distance = calculate_distance(job_coords, user_coords)
@@ -105,11 +86,8 @@ def prepare_and_recommend(df, user_skills, user_location):
105
  distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
106
 
107
  df['distance (km)'] = distances
108
-
109
- # 5. Menghitung skor akhir berdasarkan Cosine Similarity dan Jarak
110
  df['final score'] = df['cosine_similarity'] / (df['distance (km)'] + 1)
111
 
112
- # 6. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
113
  top_jobs = df.sort_values(by='final score', ascending=False).head(5)
114
 
115
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]
 
8
  import warnings
9
  import time
10
 
 
11
  logging.getLogger("urllib3").setLevel(logging.CRITICAL)
12
  warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")
13
 
 
14
  geolocator = Nominatim(user_agent="job_recommendation_system")
15
 
 
16
  def get_coordinates(location):
17
  geolocator = Nominatim(user_agent="job_recommendation_system")
18
+ location = location.replace("-", ",")
19
 
 
 
 
 
20
  if "Metropolitan Area" in location or "Region" in location:
21
  city_name = location.split(" ")[0] # Ambil nama kota utama
22
  location = city_name
23
 
 
24
  location_obj = geolocator.geocode(location)
25
  if location_obj:
26
  return location_obj.latitude, location_obj.longitude
27
 
 
28
  print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.")
29
  city_or_area = location.split(',')[0].strip() # Ambil nama kota pertama
30
  location_obj = geolocator.geocode(city_or_area)
 
33
  country = location.split(',')[-1].strip() # Ambil nama negara terakhir
34
  location_obj = geolocator.geocode(country)
35
 
 
36
  retry_count = 0
37
  while not location_obj and retry_count < 5:
38
  print(f"Mencoba ulang untuk lokasi {location}...")
 
40
  location_obj = geolocator.geocode(location)
41
  retry_count += 1
42
 
 
43
  if location_obj:
44
  return location_obj.latitude, location_obj.longitude
45
  else:
46
  print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.")
47
  return None, None
48
 
 
49
  sample_data = pd.read_csv('job_data_with_coordinates.csv')
50
 
 
51
  def vectorize_skills(skills, all_skills):
52
  vectorizer = CountVectorizer()
53
  vectorizer.fit(all_skills)
54
  skills_vector = vectorizer.transform(skills)
55
  return skills_vector
56
 
 
57
  def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf):
58
  return cosine_similarity(user_skills_tfidf, job_skills_tfidf)
59
 
 
60
  def calculate_distance(job_coords, user_coords):
61
  try:
62
  return geodesic(job_coords, user_coords).km
63
  except ValueError: # Menangani kasus koordinat yang tidak valid
64
  return float('inf')
65
 
66
+ # Fungsi utama untuk persiapan dan rekomendasi pekerjaan
67
  def prepare_and_recommend(df, user_skills, user_location):
 
68
  if 'latitude' not in df or 'longitude' not in df:
69
  raise ValueError("Dataset harus memiliki kolom latitude dan longitude")
70
 
 
71
  all_skills = df['skills'].tolist() # Semua skill dari dataset
72
  user_skills_vtr = vectorize_skills([user_skills], all_skills) # Skill user
73
  job_skills_vtr = vectorize_skills(df['skills'], all_skills) # Skill pekerjaan di dataset
74
 
 
75
  cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
76
  df['cosine_similarity'] = cosine_similarities[0]
77
 
 
78
  user_coords = get_coordinates(user_location) # Dapatkan koordinat user
79
  distances = []
80
  for _, row in df.iterrows():
 
81
  if pd.notna(row['latitude']) and pd.notna(row['longitude']) and row['latitude'] != 0 and row['longitude'] != 0:
82
  job_coords = (row['latitude'], row['longitude'])
83
  distance = calculate_distance(job_coords, user_coords)
 
86
  distances.append(float('inf')) # Jika koordinat tidak valid, jarak tak terhingga
87
 
88
  df['distance (km)'] = distances
 
 
89
  df['final score'] = df['cosine_similarity'] / (df['distance (km)'] + 1)
90
 
 
91
  top_jobs = df.sort_values(by='final score', ascending=False).head(5)
92
 
93
  return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]