File size: 2,599 Bytes
ea189f9
 
a14015e
5bbeebd
ea189f9
 
 
5bbeebd
 
 
 
 
 
 
 
 
 
 
 
a14015e
ea189f9
a14015e
ea189f9
 
a14015e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea189f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingRegressor
from sklearn.base import BaseEstimator, RegressorMixin
import gradio as gr
import joblib

class FastAIWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, learn):
        self.learn = learn
    
    def fit(self, X, y):
        return self
    
    def predict(self, X):
        dl = self.learn.dls.test_dl(X)
        preds, _ = self.learn.get_preds(dl=dl)
        return preds.numpy().flatten()

# Load your data and trained model
df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
ensemble = joblib.load('ensemble_model.joblib')

def predict_total_pay(gender, job_title, ethnicity):
    # Create a sample input DataFrame
    sample = pd.DataFrame({
        'GENDER': [gender],
        'JOB_TITLE': [job_title],
        'ETHNICITY': [ethnicity],
    })
    
    # Fill in other required features (you may need to adjust this based on your model's requirements)
    sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0]
    sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0]
    sample['MOU'] = df['MOU'].mode().iloc[0]
    sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0]
    sample['PAY_YEAR'] = df['PAY_YEAR'].max()
    sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean()
    sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean()
    sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean()
    
    # Calculate derived features
    sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
    sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
    
    # Make prediction
    prediction = ensemble.predict(sample)[0]
    return prediction

def gradio_predict(gender, ethnicity, job_title):
    predicted_pay = predict_total_pay(gender, job_title, ethnicity)
    return f"${predicted_pay:.2f}"

# Prepare dropdown options
genders = df['GENDER'].dropna().unique().tolist()
ethnicities = df['ETHNICITY'].dropna().unique().tolist()
job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())

# Create Gradio interface
iface = gr.Interface(
    fn=gradio_predict,
    inputs=[
        gr.Dropdown(choices=genders, label="Gender"),
        gr.Dropdown(choices=ethnicities, label="Ethnicity"),
        gr.Dropdown(choices=job_titles, label="Job Title")
    ],
    outputs=gr.Textbox(label="Predicted Total Pay"),
    title="LA City Employee Pay Predictor",
    description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
)

iface.launch()