File size: 3,037 Bytes
d0cf0ec
 
 
 
 
 
 
4dd7600
d0cf0ec
 
630347a
 
 
d0cf0ec
 
630347a
d0cf0ec
 
630347a
d0cf0ec
 
630347a
 
 
d0cf0ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime
import joblib

# Load the trained Random Forest model
rf_model = joblib.load('rf_model.pkl')

# Load encoders
le_jobtitle = joblib.load('le_jobtitle.pkl')
le_location = joblib.load('le_location.pkl')
le_season = joblib.load('le_season.pkl')

# Load scaler
scaler = joblib.load('scaler.pkl')

# Load target encoding mapping for Hospital
hospital_target_mapping = joblib.load('hospital_target_mapping.pkl')

# Load lists for dropdown menus
job_titles = joblib.load('job_titles.pkl')
locations = joblib.load('locations.pkl')
hospitals = joblib.load('hospitals.pkl')

# Streamlit app
st.title('Hourly Pay Rate Prediction')

st.write("""
This application predicts the **Hourly Pay Rate** based on:
- Job Title
- Location
- Hospital
- Contract Start Date
- Contract End Date
""")

# User inputs
job_title = st.selectbox('Job Title', sorted(job_titles))
location = st.selectbox('Location', sorted(locations))
hospital = st.selectbox('Hospital', sorted(hospitals))
contract_start_date = st.date_input('Contract Start Date', datetime.today())
contract_end_date = st.date_input('Contract End Date', datetime.today())

# Validate contract dates
if contract_end_date < contract_start_date:
    st.error('Contract End Date must be after Contract Start Date.')
    st.stop()

# Feature Engineering
contract_duration = (contract_end_date - contract_start_date).days
start_month = contract_start_date.month
start_year = contract_start_date.year

def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

season = get_season(start_month)

job_title_encoded = le_jobtitle.transform([job_title])[0]
location_encoded = le_location.transform([location])[0]
season_encoded = le_season.transform([season])[0]

# For 'HospitalEncoded_RF', use the target encoding mapping
average_hourly_rate = np.mean(list(hospital_target_mapping.values()))
hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate)

# Prepare numerical features
numerical_features = ['ContractDuration', 'StartMonth', 'StartYear']
numerical_values = np.array([[contract_duration, start_month, start_year]])
numerical_values_scaled = scaler.transform(numerical_values)

input_data = pd.DataFrame({
    'JobTitleEncoded': [job_title_encoded],
    'LocationEncoded': [location_encoded],
    'HospitalEncoded_RF': [hospital_encoded_rf],
    'SeasonEncoded': [season_encoded],
    'ContractDuration': [numerical_values_scaled[0][0]],
    'StartMonth': [numerical_values_scaled[0][1]],
    'StartYear': [numerical_values_scaled[0][2]],
})

features_rf = [
    'JobTitleEncoded',
    'LocationEncoded',
    'HospitalEncoded_RF',
    'SeasonEncoded',
    'ContractDuration',
    'StartMonth',
    'StartYear'
]

prediction = rf_model.predict(input_data[features_rf])
st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}")