Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from datetime import datetime
|
5 |
+
import joblib
|
6 |
+
|
7 |
+
# Load the trained Random Forest model
|
8 |
+
rf_model = joblib.load('rf_model.joblib')
|
9 |
+
|
10 |
+
# Load encoders
|
11 |
+
le_jobtitle = joblib.load('le_jobtitle.joblib')
|
12 |
+
le_location = joblib.load('le_location.joblib')
|
13 |
+
le_season = joblib.load('le_season.joblib')
|
14 |
+
|
15 |
+
# Load scaler
|
16 |
+
scaler = joblib.load('scaler.joblib')
|
17 |
+
|
18 |
+
# Load target encoding mapping for Hospital
|
19 |
+
hospital_target_mapping = joblib.load('hospital_target_mapping.joblib')
|
20 |
+
|
21 |
+
# Load lists for dropdown menus
|
22 |
+
job_titles = joblib.load('job_titles.joblib')
|
23 |
+
locations = joblib.load('locations.joblib')
|
24 |
+
hospitals = joblib.load('hospitals.joblib')
|
25 |
+
|
26 |
+
# Streamlit app
|
27 |
+
st.title('Hourly Pay Rate Prediction')
|
28 |
+
|
29 |
+
st.write("""
|
30 |
+
This application predicts the **Hourly Pay Rate** based on:
|
31 |
+
- Job Title
|
32 |
+
- Location
|
33 |
+
- Hospital
|
34 |
+
- Contract Start Date
|
35 |
+
- Contract End Date
|
36 |
+
""")
|
37 |
+
|
38 |
+
# User inputs
|
39 |
+
job_title = st.selectbox('Job Title', sorted(job_titles))
|
40 |
+
location = st.selectbox('Location', sorted(locations))
|
41 |
+
hospital = st.selectbox('Hospital', sorted(hospitals))
|
42 |
+
contract_start_date = st.date_input('Contract Start Date', datetime.today())
|
43 |
+
contract_end_date = st.date_input('Contract End Date', datetime.today())
|
44 |
+
|
45 |
+
# Validate contract dates
|
46 |
+
if contract_end_date < contract_start_date:
|
47 |
+
st.error('Contract End Date must be after Contract Start Date.')
|
48 |
+
st.stop()
|
49 |
+
|
50 |
+
# Feature Engineering
|
51 |
+
contract_duration = (contract_end_date - contract_start_date).days
|
52 |
+
start_month = contract_start_date.month
|
53 |
+
start_year = contract_start_date.year
|
54 |
+
|
55 |
+
def get_season(month):
|
56 |
+
if month in [12, 1, 2]:
|
57 |
+
return 'Winter'
|
58 |
+
elif month in [3, 4, 5]:
|
59 |
+
return 'Spring'
|
60 |
+
elif month in [6, 7, 8]:
|
61 |
+
return 'Summer'
|
62 |
+
else:
|
63 |
+
return 'Fall'
|
64 |
+
|
65 |
+
season = get_season(start_month)
|
66 |
+
|
67 |
+
job_title_encoded = le_jobtitle.transform([job_title])[0]
|
68 |
+
location_encoded = le_location.transform([location])[0]
|
69 |
+
season_encoded = le_season.transform([season])[0]
|
70 |
+
|
71 |
+
# For 'HospitalEncoded_RF', use the target encoding mapping
|
72 |
+
average_hourly_rate = np.mean(list(hospital_target_mapping.values()))
|
73 |
+
hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate)
|
74 |
+
|
75 |
+
# Prepare numerical features
|
76 |
+
numerical_features = ['ContractDuration', 'StartMonth', 'StartYear']
|
77 |
+
numerical_values = np.array([[contract_duration, start_month, start_year]])
|
78 |
+
numerical_values_scaled = scaler.transform(numerical_values)
|
79 |
+
|
80 |
+
input_data = pd.DataFrame({
|
81 |
+
'JobTitleEncoded': [job_title_encoded],
|
82 |
+
'LocationEncoded': [location_encoded],
|
83 |
+
'HospitalEncoded_RF': [hospital_encoded_rf],
|
84 |
+
'SeasonEncoded': [season_encoded],
|
85 |
+
'ContractDuration': [numerical_values_scaled[0][0]],
|
86 |
+
'StartMonth': [numerical_values_scaled[0][1]],
|
87 |
+
'StartYear': [numerical_values_scaled[0][2]],
|
88 |
+
})
|
89 |
+
|
90 |
+
features_rf = [
|
91 |
+
'JobTitleEncoded',
|
92 |
+
'LocationEncoded',
|
93 |
+
'HospitalEncoded_RF',
|
94 |
+
'SeasonEncoded',
|
95 |
+
'ContractDuration',
|
96 |
+
'StartMonth',
|
97 |
+
'StartYear'
|
98 |
+
]
|
99 |
+
|
100 |
+
prediction = rf_model.predict(input_data[features_rf])
|
101 |
+
st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}")
|