|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from datetime import datetime |
|
import joblib |
|
|
|
|
|
rf_model = joblib.load('rf_model.pkl') |
|
|
|
|
|
le_jobtitle = joblib.load('le_jobtitle.pkl') |
|
le_location = joblib.load('le_location.pkl') |
|
le_season = joblib.load('le_season.pkl') |
|
|
|
|
|
scaler = joblib.load('scaler.pkl') |
|
|
|
|
|
hospital_target_mapping = joblib.load('hospital_target_mapping.pkl') |
|
|
|
|
|
job_titles = joblib.load('job_titles.pkl') |
|
locations = joblib.load('locations.pkl') |
|
hospitals = joblib.load('hospitals.pkl') |
|
|
|
|
|
st.title('Hourly Pay Rate Prediction') |
|
|
|
st.write(""" |
|
This application predicts the **Hourly Pay Rate** based on: |
|
- Job Title |
|
- Location |
|
- Hospital |
|
- Contract Start Date |
|
- Contract End Date |
|
""") |
|
|
|
|
|
job_title = st.selectbox('Job Title', sorted(job_titles)) |
|
location = st.selectbox('Location', sorted(locations)) |
|
hospital = st.selectbox('Hospital', sorted(hospitals)) |
|
contract_start_date = st.date_input('Contract Start Date', datetime.today()) |
|
contract_end_date = st.date_input('Contract End Date', datetime.today()) |
|
|
|
|
|
if contract_end_date < contract_start_date: |
|
st.error('Contract End Date must be after Contract Start Date.') |
|
st.stop() |
|
|
|
|
|
contract_duration = (contract_end_date - contract_start_date).days |
|
start_month = contract_start_date.month |
|
start_year = contract_start_date.year |
|
|
|
def get_season(month): |
|
if month in [12, 1, 2]: |
|
return 'Winter' |
|
elif month in [3, 4, 5]: |
|
return 'Spring' |
|
elif month in [6, 7, 8]: |
|
return 'Summer' |
|
else: |
|
return 'Fall' |
|
|
|
season = get_season(start_month) |
|
|
|
job_title_encoded = le_jobtitle.transform([job_title])[0] |
|
location_encoded = le_location.transform([location])[0] |
|
season_encoded = le_season.transform([season])[0] |
|
|
|
|
|
average_hourly_rate = np.mean(list(hospital_target_mapping.values())) |
|
hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate) |
|
|
|
|
|
numerical_features = ['ContractDuration', 'StartMonth', 'StartYear'] |
|
numerical_values = np.array([[contract_duration, start_month, start_year]]) |
|
numerical_values_scaled = scaler.transform(numerical_values) |
|
|
|
input_data = pd.DataFrame({ |
|
'JobTitleEncoded': [job_title_encoded], |
|
'LocationEncoded': [location_encoded], |
|
'HospitalEncoded_RF': [hospital_encoded_rf], |
|
'SeasonEncoded': [season_encoded], |
|
'ContractDuration': [numerical_values_scaled[0][0]], |
|
'StartMonth': [numerical_values_scaled[0][1]], |
|
'StartYear': [numerical_values_scaled[0][2]], |
|
}) |
|
|
|
features_rf = [ |
|
'JobTitleEncoded', |
|
'LocationEncoded', |
|
'HospitalEncoded_RF', |
|
'SeasonEncoded', |
|
'ContractDuration', |
|
'StartMonth', |
|
'StartYear' |
|
] |
|
|
|
prediction = rf_model.predict(input_data[features_rf]) |
|
st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}") |
|
|