mindexplorer's picture
Update app.py
4dd7600 verified
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime
import joblib
# Load the trained Random Forest model
rf_model = joblib.load('rf_model.pkl')
# Load encoders
le_jobtitle = joblib.load('le_jobtitle.pkl')
le_location = joblib.load('le_location.pkl')
le_season = joblib.load('le_season.pkl')
# Load scaler
scaler = joblib.load('scaler.pkl')
# Load target encoding mapping for Hospital
hospital_target_mapping = joblib.load('hospital_target_mapping.pkl')
# Load lists for dropdown menus
job_titles = joblib.load('job_titles.pkl')
locations = joblib.load('locations.pkl')
hospitals = joblib.load('hospitals.pkl')
# Streamlit app
st.title('Hourly Pay Rate Prediction')
st.write("""
This application predicts the **Hourly Pay Rate** based on:
- Job Title
- Location
- Hospital
- Contract Start Date
- Contract End Date
""")
# User inputs
job_title = st.selectbox('Job Title', sorted(job_titles))
location = st.selectbox('Location', sorted(locations))
hospital = st.selectbox('Hospital', sorted(hospitals))
contract_start_date = st.date_input('Contract Start Date', datetime.today())
contract_end_date = st.date_input('Contract End Date', datetime.today())
# Validate contract dates
if contract_end_date < contract_start_date:
st.error('Contract End Date must be after Contract Start Date.')
st.stop()
# Feature Engineering
contract_duration = (contract_end_date - contract_start_date).days
start_month = contract_start_date.month
start_year = contract_start_date.year
def get_season(month):
if month in [12, 1, 2]:
return 'Winter'
elif month in [3, 4, 5]:
return 'Spring'
elif month in [6, 7, 8]:
return 'Summer'
else:
return 'Fall'
season = get_season(start_month)
job_title_encoded = le_jobtitle.transform([job_title])[0]
location_encoded = le_location.transform([location])[0]
season_encoded = le_season.transform([season])[0]
# For 'HospitalEncoded_RF', use the target encoding mapping
average_hourly_rate = np.mean(list(hospital_target_mapping.values()))
hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate)
# Prepare numerical features
numerical_features = ['ContractDuration', 'StartMonth', 'StartYear']
numerical_values = np.array([[contract_duration, start_month, start_year]])
numerical_values_scaled = scaler.transform(numerical_values)
input_data = pd.DataFrame({
'JobTitleEncoded': [job_title_encoded],
'LocationEncoded': [location_encoded],
'HospitalEncoded_RF': [hospital_encoded_rf],
'SeasonEncoded': [season_encoded],
'ContractDuration': [numerical_values_scaled[0][0]],
'StartMonth': [numerical_values_scaled[0][1]],
'StartYear': [numerical_values_scaled[0][2]],
})
features_rf = [
'JobTitleEncoded',
'LocationEncoded',
'HospitalEncoded_RF',
'SeasonEncoded',
'ContractDuration',
'StartMonth',
'StartYear'
]
prediction = rf_model.predict(input_data[features_rf])
st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}")