File size: 5,723 Bytes
bcb3e72
c1f7985
bcb3e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cacd96a
bcb3e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f044af
bcb3e72
 
 
 
 
 
1f044af
bcb3e72
 
 
1f044af
bcb3e72
 
 
 
 
 
1f044af
 
 
 
 
d2ecae9
6a482cd
a4b0912
bcb3e72
 
 
a4b0912
d2ecae9
 
 
bcb3e72
a4b0912
bcb3e72
 
 
a4b0912
d2ecae9
bcb3e72
 
d2ecae9
bcb3e72
 
 
 
d2ecae9
bcb3e72
 
a0c13e7
 
d2ecae9
 
 
 
a4b0912
d2ecae9
 
 
a4b0912
d2ecae9
 
 
 
bcb3e72
2d02f7a
bcb3e72
 
6a482cd
 
bcb3e72
 
6a482cd
bcb3e72
 
 
6a482cd
d3a67dc
 
 
 
 
3010a2d
f46476b
3010a2d
 
bc23151
bcb3e72
d3a67dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
import numpy as np
import streamlit as st
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import logging
import warnings
import time

logging.getLogger("urllib3").setLevel(logging.CRITICAL)
warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")

geolocator = Nominatim(user_agent="job_recommendation_system")

def get_coordinates(location):
    geolocator = Nominatim(user_agent="job_recommendation_system")
    location = location.replace("-", ",")  

    if "Metropolitan Area" in location or "Region" in location:
        city_name = location.split(" ")[0]  # Ambil nama kota utama
        location = city_name

    location_obj = geolocator.geocode(location)
    if location_obj:
        return location_obj.latitude, location_obj.longitude

    print(f"Koordinat untuk {location} tidak ditemukan. Mencoba alternatif.")
    city_or_area = location.split(',')[0].strip()  # Ambil nama kota pertama
    location_obj = geolocator.geocode(city_or_area)

    if not location_obj:
        country = location.split(',')[-1].strip()  # Ambil nama negara terakhir
        location_obj = geolocator.geocode(country)

    retry_count = 0
    while not location_obj and retry_count < 5:
        print(f"Mencoba ulang untuk lokasi {location}...")
        time.sleep(2)  # Delay
        location_obj = geolocator.geocode(location)
        retry_count += 1

    if location_obj:
        return location_obj.latitude, location_obj.longitude
    else:
        print(f"Koordinat untuk {location} atau alternatif tidak dapat ditemukan.")
        return None, None

sample_data = pd.read_csv('job_data_with_coordinates.csv')

# 1. Vektorisasi skill menggunakan CountVectorizer
def vectorize_skills(skills, all_skills):
    vectorizer = CountVectorizer()
    vectorizer.fit(all_skills)
    skills_vector = vectorizer.transform(skills)
    return skills_vector

# 2. Menghitung Cosine Similarity
def calculate_cosine_similarity(user_skills_tfidf, job_skills_tfidf):
    return cosine_similarity(user_skills_tfidf, job_skills_tfidf)

# 3. Menghitung jarak lokasi
def calculate_distance(job_coords, user_coords):
    try:
        return geodesic(job_coords, user_coords).km
    except ValueError:  # Menangani kasus koordinat yang tidak valid
        return float('inf')

# Fungsi normalisasi jarak menggunakan log
def normalize_distance(distances):
    normalized = 1 / (1 + np.log1p(distances))  # log1p untuk menangani log(1 + distance)
    return normalized

# Menambahkan radius sebagai parameter
def prepare_and_recommend(df, user_skills, user_location):
    # 1. Memastikan dataset memiliki koordinat
    if 'latitude' not in df or 'longitude' not in df:
        raise ValueError("Dataset harus memiliki kolom latitude dan longitude")

    # 2. Vektorisasi skill
    all_skills = df['skills'].tolist()
    user_skills_vtr = vectorize_skills([user_skills], all_skills)
    job_skills_vtr = vectorize_skills(df['skills'], all_skills)

    # 3. Menghitung Cosine Similarity antara user dan pekerjaan
    cosine_similarities = calculate_cosine_similarity(user_skills_vtr, job_skills_vtr)
    df['cosine_similarity'] = cosine_similarities[0]

    # 4. Menghitung jarak antara lokasi pekerjaan dan lokasi user
    user_coords = get_coordinates(user_location)
    distances = []
    for _, row in df.iterrows():
        if pd.notna(row['latitude']) and pd.notna(row['longitude']):
            job_coords = (row['latitude'], row['longitude'])
            distance = calculate_distance(job_coords, user_coords)
            distances.append(distance)
        else:
            distances.append(float('inf'))
    df['distance (km)'] = distances

    # # 5. Filter pekerjaan berdasarkan radius
    # df = df[df['distance (km)']]

    # 6. Normalisasi jarak
    if not df.empty:
        df['normalized_distance'] = normalize_distance(df['distance (km)'])

        # 7. Menghitung skor akhir
        df['final score'] = (1.5 * df['cosine_similarity']) * (1.0 * df['normalized_distance'])
        df['final score'] = df['final score'].round(2)

        # 8. Mengurutkan pekerjaan dan memilih 5 teratas berdasarkan skor akhir
        top_jobs = df.sort_values(by='final score', ascending=False).head(5)
    else:
        top_jobs = pd.DataFrame()  # Kembalikan DataFrame kosong jika tidak ada pekerjaan dalam radius

    return top_jobs[['job_link', 'title', 'company', 'location', 'distance (km)', 'final score']]

# Streamlit app
st.title('Job Findr')
st.write('Find your job with ease.')

user_skills = st.text_input('Enter your skills (comma-separated):')
user_location = st.text_input('Job location:')

if st.button('Get Recommendations'):
    if user_skills and user_location:
        recommended_jobs = prepare_and_recommend(sample_data, user_skills, user_location)
        if recommended_jobs.empty:
            st.warning('Tidak ditemukan pekerjaan yang sesuai dengan keterampilan dan lokasi Anda.')
        elif recommended_jobs['final score'].max() < 0.02:
            st.warning('Maaf, hasil rekomendasi mungkin kurang relevan dengan keterampilan dan lokasi Anda. Silakan coba mengubah keterampilan atau lokasi Anda.')
        else:
            recommended_jobs['Rank'] = range(1, len(recommended_jobs) + 1)
            columns = ['Rank', 'title', 'job_link', 'company', 'location', 'distance (km)']
            recommended_jobs = recommended_jobs[columns]
            recommended_jobs.reset_index(drop=True, inplace=True)
            st.dataframe(recommended_jobs.reset_index(drop=True))
    else:
        st.warning('Please enter your skills and location.')