Spaces:

huntrezz
/

LACityEmployeePayPredictor

Sleeping

App Files Files Community

LACityEmployeePayPredictor / app.py

huntrezz

Update app.py

ea9d83f verified about 1 year ago

raw

history blame contribute delete

4.69 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.ensemble import VotingRegressor
	from sklearn.linear_model import LinearRegression
	from sklearn.tree import DecisionTreeRegressor
	from sklearn.base import BaseEstimator, RegressorMixin
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	import gradio as gr
	import joblib

	class FastAIWrapper(BaseEstimator, RegressorMixin):
	def __init__(self, learn):
	self.learn = learn

	def fit(self, X, y):
	return self

	def predict(self, X):
	dl = self.learn.dls.test_dl(X)
	preds, _ = self.learn.get_preds(dl=dl)
	return preds.numpy().flatten()

	# Load data
	df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
	df = df.replace([np.inf, -np.inf], np.nan)

	# Define categorical and continuous variables
	cat_names = ['EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'GENDER', 'ETHNICITY', 'JOB_TITLE', 'DEPARTMENT_NO']
	cont_names = ['PAY_YEAR', 'REGULAR_PAY', 'OVERTIME_PAY', 'ALL_OTHER_PAY', 'PAY_RATIO', 'TOTAL_NON_REGULAR_PAY']

	# Load the trained model
	ensemble = joblib.load('ensemble_model.joblib')

	def predict_total_pay(gender, job_title, ethnicity):
	sample = pd.DataFrame({
	'GENDER': [gender],
	'JOB_TITLE': [job_title],
	'ETHNICITY': [ethnicity],
	})

	group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
	if len(group) > 0:
	sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
	sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
	sample['MOU'] = [group['MOU'].mode().iloc[0]]
	sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
	sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
	sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
	sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
	else:
	job_group = df[df['JOB_TITLE'] == job_title]
	if len(job_group) > 0:
	sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
	sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
	sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
	sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
	sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
	sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
	sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
	else:
	sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
	sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
	sample['MOU'] = [df['MOU'].mode().iloc[0]]
	sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
	sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
	sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
	sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]

	sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
	sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
	sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']

	categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
	for col in categorical_columns:
	sample[col] = sample[col].astype('object')

	prediction = ensemble.predict(sample)[0]
	return prediction

	def gradio_predict(gender, ethnicity, job_title):
	predicted_pay = predict_total_pay(gender, job_title, ethnicity)
	if predicted_pay < 0:
	return f"Predicted pay is negative (${predicted_pay:.2f} per year). May indicate financial hardship or unlikelihood of obtaining position."
	else:
	return f"${predicted_pay:.2f} per year"

	# Prepare dropdown options
	genders = df['GENDER'].dropna().unique().tolist()
	ethnicities = df['ETHNICITY'].dropna().unique().tolist()
	job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())

	# Create Gradio interface
	iface = gr.Interface(
	fn=gradio_predict,
	inputs=[
	gr.Dropdown(choices=genders, label="Gender"),
	gr.Dropdown(choices=ethnicities, label="Ethnicity"),
	gr.Dropdown(choices=job_titles, label="Job Title")
	],
	outputs=gr.Textbox(label="Predicted Total Pay"),
	title="LA City Employee Pay Predictor",
	description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
	)

	# Launch the interface
	iface.launch()