import streamlit as st import pandas as pd import numpy as np from datetime import datetime import joblib # Load the trained Random Forest model rf_model = joblib.load('rf_model.pkl') # Load encoders le_jobtitle = joblib.load('le_jobtitle.pkl') le_location = joblib.load('le_location.pkl') le_season = joblib.load('le_season.pkl') # Load scaler scaler = joblib.load('scaler.pkl') # Load target encoding mapping for Hospital hospital_target_mapping = joblib.load('hospital_target_mapping.pkl') # Load lists for dropdown menus job_titles = joblib.load('job_titles.pkl') locations = joblib.load('locations.pkl') hospitals = joblib.load('hospitals.pkl') # Streamlit app st.title('Hourly Pay Rate Prediction') st.write(""" This application predicts the **Hourly Pay Rate** based on: - Job Title - Location - Hospital - Contract Start Date - Contract End Date """) # User inputs job_title = st.selectbox('Job Title', sorted(job_titles)) location = st.selectbox('Location', sorted(locations)) hospital = st.selectbox('Hospital', sorted(hospitals)) contract_start_date = st.date_input('Contract Start Date', datetime.today()) contract_end_date = st.date_input('Contract End Date', datetime.today()) # Validate contract dates if contract_end_date < contract_start_date: st.error('Contract End Date must be after Contract Start Date.') st.stop() # Feature Engineering contract_duration = (contract_end_date - contract_start_date).days start_month = contract_start_date.month start_year = contract_start_date.year def get_season(month): if month in [12, 1, 2]: return 'Winter' elif month in [3, 4, 5]: return 'Spring' elif month in [6, 7, 8]: return 'Summer' else: return 'Fall' season = get_season(start_month) job_title_encoded = le_jobtitle.transform([job_title])[0] location_encoded = le_location.transform([location])[0] season_encoded = le_season.transform([season])[0] # For 'HospitalEncoded_RF', use the target encoding mapping average_hourly_rate = np.mean(list(hospital_target_mapping.values())) hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate) # Prepare numerical features numerical_features = ['ContractDuration', 'StartMonth', 'StartYear'] numerical_values = np.array([[contract_duration, start_month, start_year]]) numerical_values_scaled = scaler.transform(numerical_values) input_data = pd.DataFrame({ 'JobTitleEncoded': [job_title_encoded], 'LocationEncoded': [location_encoded], 'HospitalEncoded_RF': [hospital_encoded_rf], 'SeasonEncoded': [season_encoded], 'ContractDuration': [numerical_values_scaled[0][0]], 'StartMonth': [numerical_values_scaled[0][1]], 'StartYear': [numerical_values_scaled[0][2]], }) features_rf = [ 'JobTitleEncoded', 'LocationEncoded', 'HospitalEncoded_RF', 'SeasonEncoded', 'ContractDuration', 'StartMonth', 'StartYear' ] prediction = rf_model.predict(input_data[features_rf]) st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}")