Spaces:

huntrezz
/

LACityEmployeePayPredictor

Sleeping

File size: 2,599 Bytes

ea189f9
 
a14015e
5bbeebd
ea189f9
 
 
5bbeebd
 
 
 
 
 
 
 
 
 
 
 
a14015e
ea189f9
a14015e
ea189f9
 
a14015e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea189f9

import pandas as pd
import numpy as np
from sklearn.ensemble import VotingRegressor
from sklearn.base import BaseEstimator, RegressorMixin
import gradio as gr
import joblib

class FastAIWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, learn):
        self.learn = learn
    
    def fit(self, X, y):
        return self
    
    def predict(self, X):
        dl = self.learn.dls.test_dl(X)
        preds, _ = self.learn.get_preds(dl=dl)
        return preds.numpy().flatten()

# Load your data and trained model
df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
ensemble = joblib.load('ensemble_model.joblib')

def predict_total_pay(gender, job_title, ethnicity):
    # Create a sample input DataFrame
    sample = pd.DataFrame({
        'GENDER': [gender],
        'JOB_TITLE': [job_title],
        'ETHNICITY': [ethnicity],
    })
    
    # Fill in other required features (you may need to adjust this based on your model's requirements)
    sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0]
    sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0]
    sample['MOU'] = df['MOU'].mode().iloc[0]
    sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0]
    sample['PAY_YEAR'] = df['PAY_YEAR'].max()
    sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean()
    sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean()
    sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean()
    
    # Calculate derived features
    sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
    sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
    
    # Make prediction
    prediction = ensemble.predict(sample)[0]
    return prediction

def gradio_predict(gender, ethnicity, job_title):
    predicted_pay = predict_total_pay(gender, job_title, ethnicity)
    return f"${predicted_pay:.2f}"

# Prepare dropdown options
genders = df['GENDER'].dropna().unique().tolist()
ethnicities = df['ETHNICITY'].dropna().unique().tolist()
job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())

# Create Gradio interface
iface = gr.Interface(
    fn=gradio_predict,
    inputs=[
        gr.Dropdown(choices=genders, label="Gender"),
        gr.Dropdown(choices=ethnicities, label="Ethnicity"),
        gr.Dropdown(choices=job_titles, label="Job Title")
    ],
    outputs=gr.Textbox(label="Predicted Total Pay"),
    title="LA City Employee Pay Predictor",
    description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
)

iface.launch()