huntrezz commited on
Commit
0dcbe04
·
verified ·
1 Parent(s): e1a6bd1

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -127
app.py DELETED
@@ -1,127 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
4
- from sklearn.model_selection import train_test_split
5
- from fastai.tabular.all import *
6
- from sklearn.ensemble import VotingRegressor
7
- from sklearn.linear_model import LinearRegression
8
- from sklearn.tree import DecisionTreeRegressor
9
- from sklearn.base import BaseEstimator, RegressorMixin
10
- from sklearn.compose import ColumnTransformer
11
- from sklearn.pipeline import Pipeline
12
- import gradio as gr
13
-
14
- df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
15
- df = df.replace([np.inf, -np.inf], np.nan)
16
-
17
- cat_names = ['EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'GENDER', 'ETHNICITY', 'JOB_TITLE', 'DEPARTMENT_NO']
18
- cont_names = ['PAY_YEAR', 'REGULAR_PAY', 'OVERTIME_PAY', 'ALL_OTHER_PAY']
19
-
20
- df['PAY_RATIO'] = df['REGULAR_PAY'] / (df['OVERTIME_PAY'] + df['ALL_OTHER_PAY'] + 1)
21
- df['TOTAL_NON_REGULAR_PAY'] = df['OVERTIME_PAY'] + df['ALL_OTHER_PAY']
22
- cont_names.extend(['PAY_RATIO', 'TOTAL_NON_REGULAR_PAY'])
23
-
24
- X = df[cat_names + cont_names].copy()
25
- y = df['TOTAL_PAY'].copy()
26
- for col in cat_names:
27
- X[col] = X[col].fillna('Unknown')
28
-
29
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
30
- X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=0.3, random_state=42)
31
-
32
- to = TabularPandas(df, procs=[Categorify, FillMissing, Normalize], cat_names=cat_names, cont_names=cont_names, y_names='TOTAL_PAY', splits=RandomSplitter(valid_pct=0.2)(range_of(df)))
33
- dls = to.dataloaders(bs=64)
34
-
35
- learn = tabular_learner(dls, layers=[200, 100, 50], metrics=rmse)
36
- learn.fit_one_cycle(9)
37
-
38
- class FastAIWrapper(BaseEstimator, RegressorMixin):
39
- def __init__(self, learn):
40
- self.learn = learn
41
- def fit(self, X, y):
42
- return self
43
- def predict(self, X):
44
- dl = self.learn.dls.test_dl(X)
45
- preds, _ = self.learn.get_preds(dl=dl)
46
- return preds.numpy().flatten()
47
-
48
- preprocessor = ColumnTransformer(
49
- transformers=[
50
- ('num', StandardScaler(), cont_names),
51
- ('cat', OneHotEncoder(drop='first', sparse=False, handle_unknown='ignore'), cat_names)
52
- ])
53
-
54
- model1 = FastAIWrapper(learn)
55
- model2 = Pipeline([('preprocessor', preprocessor), ('regressor', LinearRegression())])
56
- model3 = Pipeline([('preprocessor', preprocessor), ('regressor', DecisionTreeRegressor())])
57
-
58
- ensemble = VotingRegressor(
59
- estimators=[('fastai', model1), ('lr', model2), ('dt', model3)],
60
- weights=[2, 1, 1]
61
- )
62
-
63
- ensemble.fit(X_train_sample, y_train_sample)
64
-
65
- def predict_total_pay(gender, job_title, ethnicity):
66
- sample = pd.DataFrame({
67
- 'GENDER': [gender],
68
- 'JOB_TITLE': [job_title],
69
- 'ETHNICITY': [ethnicity],
70
- })
71
- group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
72
- if len(group) > 0:
73
- sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
74
- sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
75
- sample['MOU'] = [group['MOU'].mode().iloc[0]]
76
- sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
77
- sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
78
- sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
79
- sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
80
- else:
81
- job_group = df[df['JOB_TITLE'] == job_title]
82
- if len(job_group) > 0:
83
- sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
84
- sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
85
- sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
86
- sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
87
- sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
88
- sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
89
- sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
90
- else:
91
- sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
92
- sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
93
- sample['MOU'] = [df['MOU'].mode().iloc[0]]
94
- sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
95
- sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
96
- sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
97
- sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]
98
- sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
99
- sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
100
- sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
101
- categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
102
- for col in categorical_columns:
103
- sample[col] = sample[col].astype('object')
104
- prediction = ensemble.predict(sample)[0]
105
- return prediction
106
-
107
- def gradio_predict(gender, ethnicity, job_title):
108
- predicted_pay = predict_total_pay(gender, job_title, ethnicity)
109
- return f"${predicted_pay:.2f}"
110
-
111
- genders = df['GENDER'].dropna().unique().tolist()
112
- ethnicities = df['ETHNICITY'].dropna().unique().tolist()
113
- job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())
114
-
115
- iface = gr.Interface(
116
- fn=gradio_predict,
117
- inputs=[
118
- gr.Dropdown(choices=genders, label="Gender"),
119
- gr.Dropdown(choices=ethnicities, label="Ethnicity"),
120
- gr.Dropdown(choices=job_titles, label="Job Title")
121
- ],
122
- outputs=gr.Textbox(label="Predicted Total Pay"),
123
- title="LA City Employee Pay Predictor",
124
- description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
125
- )
126
-
127
- iface.launch()