Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.ensemble import VotingRegressor | |
| from sklearn.base import BaseEstimator, RegressorMixin | |
| import gradio as gr | |
| import joblib | |
| class FastAIWrapper(BaseEstimator, RegressorMixin): | |
| def __init__(self, learn): | |
| self.learn = learn | |
| def fit(self, X, y): | |
| return self | |
| def predict(self, X): | |
| dl = self.learn.dls.test_dl(X) | |
| preds, _ = self.learn.get_preds(dl=dl) | |
| return preds.numpy().flatten() | |
| # Load your data and trained model | |
| df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False) | |
| ensemble = joblib.load('ensemble_model.joblib') | |
| def predict_total_pay(gender, job_title, ethnicity): | |
| # Create a sample input DataFrame | |
| sample = pd.DataFrame({ | |
| 'GENDER': [gender], | |
| 'JOB_TITLE': [job_title], | |
| 'ETHNICITY': [ethnicity], | |
| }) | |
| # Fill in other required features (you may need to adjust this based on your model's requirements) | |
| sample['EMPLOYMENT_TYPE'] = df['EMPLOYMENT_TYPE'].mode().iloc[0] | |
| sample['JOB_STATUS'] = df['JOB_STATUS'].mode().iloc[0] | |
| sample['MOU'] = df['MOU'].mode().iloc[0] | |
| sample['DEPARTMENT_NO'] = df['DEPARTMENT_NO'].mode().iloc[0] | |
| sample['PAY_YEAR'] = df['PAY_YEAR'].max() | |
| sample['REGULAR_PAY'] = df['REGULAR_PAY'].mean() | |
| sample['OVERTIME_PAY'] = df['OVERTIME_PAY'].mean() | |
| sample['ALL_OTHER_PAY'] = df['ALL_OTHER_PAY'].mean() | |
| # Calculate derived features | |
| sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1) | |
| sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] | |
| # Make prediction | |
| prediction = ensemble.predict(sample)[0] | |
| return prediction | |
| def predict_total_pay(gender, job_title, ethnicity): | |
| # Function to predict total pay based on input parameters | |
| # Parameters: | |
| # gender: str - The gender of the employee | |
| # job_title: str - The job title of the employee | |
| # ethnicity: str - The ethnicity of the employee | |
| # Create a sample input DataFrame with the given parameters | |
| # This will be used as input for the prediction model | |
| sample = pd.DataFrame({ | |
| 'GENDER': [gender], | |
| 'JOB_TITLE': [job_title], | |
| 'ETHNICITY': [ethnicity], | |
| }) | |
| # Filter the main DataFrame (df) to find exact matches for the input combination | |
| # This creates a subset of data that matches all three input parameters | |
| group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)] | |
| if len(group) > 0: | |
| # If exact matches are found, use their statistics to populate the sample | |
| # For categorical variables, use the mode (most frequent value) | |
| sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]] | |
| sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]] | |
| sample['MOU'] = [group['MOU'].mode().iloc[0]] | |
| sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]] | |
| # For numerical variables, use the mean | |
| sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()] | |
| sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()] | |
| sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()] | |
| else: | |
| # If no exact match is found, try to find a broader match based on job_title | |
| job_group = df[df['JOB_TITLE'] == job_title] | |
| if len(job_group) > 0: | |
| # If job title matches are found, use their statistics | |
| sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]] | |
| sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]] | |
| sample['MOU'] = [job_group['MOU'].mode().iloc[0]] | |
| sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]] | |
| sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()] | |
| sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()] | |
| sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()] | |
| else: | |
| # If no job title match is found, use overall statistics from the entire dataset | |
| sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]] | |
| sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]] | |
| sample['MOU'] = [df['MOU'].mode().iloc[0]] | |
| sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]] | |
| sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()] | |
| sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()] | |
| sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()] | |
| # Set PAY_YEAR to the most recent year in the dataset | |
| sample['PAY_YEAR'] = [df['PAY_YEAR'].max()] | |
| # Calculate derived features | |
| # PAY_RATIO: Ratio of regular pay to other types of pay | |
| sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1) | |
| # TOTAL_NON_REGULAR_PAY: Sum of overtime pay and all other pay | |
| sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] | |
| # Ensure all categorical columns are of type 'object' to prevent type issues with the model | |
| categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO'] | |
| for col in categorical_columns: | |
| sample[col] = sample[col].astype('object') | |
| # Use the ensemble model to make a prediction | |
| # The model takes the sample DataFrame as input and returns a predicted total pay | |
| prediction = ensemble.predict(sample)[0] | |
| # Return the predicted total pay | |
| return prediction | |
| # Prepare dropdown options | |
| genders = df['GENDER'].dropna().unique().tolist() | |
| ethnicities = df['ETHNICITY'].dropna().unique().tolist() | |
| job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist()) | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=predict_total_pay, | |
| inputs=[ | |
| gr.Dropdown(choices=genders, label="Gender"), | |
| gr.Dropdown(choices=ethnicities, label="Ethnicity"), | |
| gr.Dropdown(choices=job_titles, label="Job Title") | |
| ], | |
| outputs=gr.Textbox(label="Predicted Total Pay"), | |
| title="LA City Employee Pay Predictor", | |
| description="Predict the total pay for LA City employees based on gender, ethnicity, and job title." | |
| ) | |
| iface.launch() |