Spaces:
Sleeping
Sleeping
File size: 6,326 Bytes
ea189f9 a14015e 5bbeebd ea189f9 5bbeebd a14015e ea189f9 a14015e ea189f9 a14015e ea189f9 ad5a6b0 ea189f9 ad5a6b0 ea189f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingRegressor
from sklearn.base import BaseEstimator, RegressorMixin
import gradio as gr
import joblib
class FastAIWrapper(BaseEstimator, RegressorMixin):
def __init__(self, learn):
self.learn = learn
def fit(self, X, y):
return self
def predict(self, X):
dl = self.learn.dls.test_dl(X)
preds, _ = self.learn.get_preds(dl=dl)
return preds.numpy().flatten()
# Load your data and trained model
df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
ensemble = joblib.load('ensemble_model.joblib')
def predict_total_pay(gender, job_title, ethnicity):
# Create a sample input DataFrame
sample = pd.DataFrame({
'GENDER': [gender],
'JOB_TITLE': [job_title],
'ETHNICITY': [ethnicity],
})
# Fill in other required features (you may need to adjust this based on your model's requirements)
sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0]
sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0]
sample['MOU'] = df['MOU'].mode().iloc[0]
sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0]
sample['PAY_YEAR'] = df['PAY_YEAR'].max()
sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean()
sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean()
sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean()
# Calculate derived features
sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
# Make prediction
prediction = ensemble.predict(sample)[0]
return prediction
def predict_total_pay(gender, job_title, ethnicity):
# Function to predict total pay based on input parameters
# Parameters:
# gender: str - The gender of the employee
# job_title: str - The job title of the employee
# ethnicity: str - The ethnicity of the employee
# Create a sample input DataFrame with the given parameters
# This will be used as input for the prediction model
sample = pd.DataFrame({
'GENDER': [gender],
'JOB_TITLE': [job_title],
'ETHNICITY': [ethnicity],
})
# Filter the main DataFrame (df) to find exact matches for the input combination
# This creates a subset of data that matches all three input parameters
group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
if len(group) > 0:
# If exact matches are found, use their statistics to populate the sample
# For categorical variables, use the mode (most frequent value)
sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [group['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
# For numerical variables, use the mean
sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
else:
# If no exact match is found, try to find a broader match based on job_title
job_group = df[df['JOB_TITLE'] == job_title]
if len(job_group) > 0:
# If job title matches are found, use their statistics
sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
else:
# If no job title match is found, use overall statistics from the entire dataset
sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
sample['MOU'] = [df['MOU'].mode().iloc[0]]
sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]
# Set PAY_YEAR to the most recent year in the dataset
sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
# Calculate derived features
# PAY_RATIO: Ratio of regular pay to other types of pay
sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
# TOTAL_NON_REGULAR_PAY: Sum of overtime pay and all other pay
sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
# Ensure all categorical columns are of type 'object' to prevent type issues with the model
categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
for col in categorical_columns:
sample[col] = sample[col].astype('object')
# Use the ensemble model to make a prediction
# The model takes the sample DataFrame as input and returns a predicted total pay
prediction = ensemble.predict(sample)[0]
# Return the predicted total pay
return prediction
# Prepare dropdown options
genders = df['GENDER'].dropna().unique().tolist()
ethnicities = df['ETHNICITY'].dropna().unique().tolist()
job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())
# Create Gradio interface
iface = gr.Interface(
fn=predict_total_pay,
inputs=[
gr.Dropdown(choices=genders, label="Gender"),
gr.Dropdown(choices=ethnicities, label="Ethnicity"),
gr.Dropdown(choices=job_titles, label="Job Title")
],
outputs=gr.Textbox(label="Predicted Total Pay"),
title="LA City Employee Pay Predictor",
description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
)
iface.launch() |