mindexplorer commited on
Commit
d0cf0ec
·
verified ·
1 Parent(s): 2573183

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
5
+ import joblib
6
+
7
+ # Load the trained Random Forest model
8
+ rf_model = joblib.load('rf_model.joblib')
9
+
10
+ # Load encoders
11
+ le_jobtitle = joblib.load('le_jobtitle.joblib')
12
+ le_location = joblib.load('le_location.joblib')
13
+ le_season = joblib.load('le_season.joblib')
14
+
15
+ # Load scaler
16
+ scaler = joblib.load('scaler.joblib')
17
+
18
+ # Load target encoding mapping for Hospital
19
+ hospital_target_mapping = joblib.load('hospital_target_mapping.joblib')
20
+
21
+ # Load lists for dropdown menus
22
+ job_titles = joblib.load('job_titles.joblib')
23
+ locations = joblib.load('locations.joblib')
24
+ hospitals = joblib.load('hospitals.joblib')
25
+
26
+ # Streamlit app
27
+ st.title('Hourly Pay Rate Prediction')
28
+
29
+ st.write("""
30
+ This application predicts the **Hourly Pay Rate** based on:
31
+ - Job Title
32
+ - Location
33
+ - Hospital
34
+ - Contract Start Date
35
+ - Contract End Date
36
+ """)
37
+
38
+ # User inputs
39
+ job_title = st.selectbox('Job Title', sorted(job_titles))
40
+ location = st.selectbox('Location', sorted(locations))
41
+ hospital = st.selectbox('Hospital', sorted(hospitals))
42
+ contract_start_date = st.date_input('Contract Start Date', datetime.today())
43
+ contract_end_date = st.date_input('Contract End Date', datetime.today())
44
+
45
+ # Validate contract dates
46
+ if contract_end_date < contract_start_date:
47
+ st.error('Contract End Date must be after Contract Start Date.')
48
+ st.stop()
49
+
50
+ # Feature Engineering
51
+ contract_duration = (contract_end_date - contract_start_date).days
52
+ start_month = contract_start_date.month
53
+ start_year = contract_start_date.year
54
+
55
+ def get_season(month):
56
+ if month in [12, 1, 2]:
57
+ return 'Winter'
58
+ elif month in [3, 4, 5]:
59
+ return 'Spring'
60
+ elif month in [6, 7, 8]:
61
+ return 'Summer'
62
+ else:
63
+ return 'Fall'
64
+
65
+ season = get_season(start_month)
66
+
67
+ job_title_encoded = le_jobtitle.transform([job_title])[0]
68
+ location_encoded = le_location.transform([location])[0]
69
+ season_encoded = le_season.transform([season])[0]
70
+
71
+ # For 'HospitalEncoded_RF', use the target encoding mapping
72
+ average_hourly_rate = np.mean(list(hospital_target_mapping.values()))
73
+ hospital_encoded_rf = hospital_target_mapping.get(hospital, average_hourly_rate)
74
+
75
+ # Prepare numerical features
76
+ numerical_features = ['ContractDuration', 'StartMonth', 'StartYear']
77
+ numerical_values = np.array([[contract_duration, start_month, start_year]])
78
+ numerical_values_scaled = scaler.transform(numerical_values)
79
+
80
+ input_data = pd.DataFrame({
81
+ 'JobTitleEncoded': [job_title_encoded],
82
+ 'LocationEncoded': [location_encoded],
83
+ 'HospitalEncoded_RF': [hospital_encoded_rf],
84
+ 'SeasonEncoded': [season_encoded],
85
+ 'ContractDuration': [numerical_values_scaled[0][0]],
86
+ 'StartMonth': [numerical_values_scaled[0][1]],
87
+ 'StartYear': [numerical_values_scaled[0][2]],
88
+ })
89
+
90
+ features_rf = [
91
+ 'JobTitleEncoded',
92
+ 'LocationEncoded',
93
+ 'HospitalEncoded_RF',
94
+ 'SeasonEncoded',
95
+ 'ContractDuration',
96
+ 'StartMonth',
97
+ 'StartYear'
98
+ ]
99
+
100
+ prediction = rf_model.predict(input_data[features_rf])
101
+ st.write(f"## Predicted Hourly Pay Rate: ${prediction[0]:.2f}")