Spaces:
Sleeping
Sleeping
File size: 4,689 Bytes
ea189f9 060d492 a14015e 060d492 5bbeebd 060d492 ea189f9 82930cd 060d492 ea189f9 060d492 a14015e ea189f9 ad5a6b0 060d492 ad5a6b0 060d492 ad5a6b0 060d492 ad5a6b0 060d492 ad5a6b0 ea189f9 402148f c874ca6 ea9d83f c874ca6 402148f ea189f9 402148f ea189f9 402148f ea189f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import gradio as gr
import joblib
class FastAIWrapper(BaseEstimator, RegressorMixin):
def __init__(self, learn):
self.learn = learn
def fit(self, X, y):
return self
def predict(self, X):
dl = self.learn.dls.test_dl(X)
preds, _ = self.learn.get_preds(dl=dl)
return preds.numpy().flatten()
# Load data
df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
df = df.replace([np.inf, -np.inf], np.nan)
# Define categorical and continuous variables
cat_names = ['EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'GENDER', 'ETHNICITY', 'JOB_TITLE', 'DEPARTMENT_NO']
cont_names = ['PAY_YEAR', 'REGULAR_PAY', 'OVERTIME_PAY', 'ALL_OTHER_PAY', 'PAY_RATIO', 'TOTAL_NON_REGULAR_PAY']
# Load the trained model
ensemble = joblib.load('ensemble_model.joblib')
def predict_total_pay(gender, job_title, ethnicity):
sample = pd.DataFrame({
'GENDER': [gender],
'JOB_TITLE': [job_title],
'ETHNICITY': [ethnicity],
})
group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
if len(group) > 0:
sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [group['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
else:
job_group = df[df['JOB_TITLE'] == job_title]
if len(job_group) > 0:
sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
else:
sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [df['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]
sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
for col in categorical_columns:
sample[col] = sample[col].astype('object')
prediction = ensemble.predict(sample)[0]
return prediction
def gradio_predict(gender, ethnicity, job_title):
predicted_pay = predict_total_pay(gender, job_title, ethnicity)
if predicted_pay < 0:
return f"Predicted pay is negative (${predicted_pay:.2f} per year). May indicate financial hardship or unlikelihood of obtaining position."
else:
return f"${predicted_pay:.2f} per year"
# Prepare dropdown options
genders = df['GENDER'].dropna().unique().tolist()
ethnicities = df['ETHNICITY'].dropna().unique().tolist()
job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())
# Create Gradio interface
iface = gr.Interface(
fn=gradio_predict,
inputs=[
gr.Dropdown(choices=genders, label="Gender"),
gr.Dropdown(choices=ethnicities, label="Ethnicity"),
gr.Dropdown(choices=job_titles, label="Job Title")
],
outputs=gr.Textbox(label="Predicted Total Pay"),
title="LA City Employee Pay Predictor",
description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
)
# Launch the interface
iface.launch() |