huntrezz commited on
Commit
ad5a6b0
·
verified ·
1 Parent(s): 85e5e78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -4
app.py CHANGED
@@ -47,9 +47,78 @@ def predict_total_pay(gender, job_title, ethnicity):
47
  prediction = ensemble.predict(sample)[0]
48
  return prediction
49
 
50
- def gradio_predict(gender, ethnicity, job_title):
51
- predicted_pay = predict_total_pay(gender, job_title, ethnicity)
52
- return f"${predicted_pay:.2f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Prepare dropdown options
55
  genders = df['GENDER'].dropna().unique().tolist()
@@ -58,7 +127,7 @@ job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())
58
 
59
  # Create Gradio interface
60
  iface = gr.Interface(
61
- fn=gradio_predict,
62
  inputs=[
63
  gr.Dropdown(choices=genders, label="Gender"),
64
  gr.Dropdown(choices=ethnicities, label="Ethnicity"),
 
47
  prediction = ensemble.predict(sample)[0]
48
  return prediction
49
 
50
+ def predict_total_pay(gender, job_title, ethnicity):
51
+ # Function to predict total pay based on input parameters
52
+ # Parameters:
53
+ # gender: str - The gender of the employee
54
+ # job_title: str - The job title of the employee
55
+ # ethnicity: str - The ethnicity of the employee
56
+
57
+ # Create a sample input DataFrame with the given parameters
58
+ # This will be used as input for the prediction model
59
+ sample = pd.DataFrame({
60
+ 'GENDER': [gender],
61
+ 'JOB_TITLE': [job_title],
62
+ 'ETHNICITY': [ethnicity],
63
+ })
64
+
65
+ # Filter the main DataFrame (df) to find exact matches for the input combination
66
+ # This creates a subset of data that matches all three input parameters
67
+ group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
68
+
69
+ if len(group) > 0:
70
+ # If exact matches are found, use their statistics to populate the sample
71
+ # For categorical variables, use the mode (most frequent value)
72
+ sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
73
+ sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
74
+ sample['MOU'] = [group['MOU'].mode().iloc[0]]
75
+ sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
76
+ # For numerical variables, use the mean
77
+ sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
78
+ sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
79
+ sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
80
+ else:
81
+ # If no exact match is found, try to find a broader match based on job_title
82
+ job_group = df[df['JOB_TITLE'] == job_title]
83
+ if len(job_group) > 0:
84
+ # If job title matches are found, use their statistics
85
+ sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
86
+ sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
87
+ sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
88
+ sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
89
+ sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
90
+ sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
91
+ sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
92
+ else:
93
+ # If no job title match is found, use overall statistics from the entire dataset
94
+ sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
95
+ sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
96
+ sample['MOU'] = [df['MOU'].mode().iloc[0]]
97
+ sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
98
+ sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
99
+ sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
100
+ sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]
101
+
102
+ # Set PAY_YEAR to the most recent year in the dataset
103
+ sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
104
+
105
+ # Calculate derived features
106
+ # PAY_RATIO: Ratio of regular pay to other types of pay
107
+ sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
108
+ # TOTAL_NON_REGULAR_PAY: Sum of overtime pay and all other pay
109
+ sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
110
+
111
+ # Ensure all categorical columns are of type 'object' to prevent type issues with the model
112
+ categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
113
+ for col in categorical_columns:
114
+ sample[col] = sample[col].astype('object')
115
+
116
+ # Use the ensemble model to make a prediction
117
+ # The model takes the sample DataFrame as input and returns a predicted total pay
118
+ prediction = ensemble.predict(sample)[0]
119
+
120
+ # Return the predicted total pay
121
+ return prediction
122
 
123
  # Prepare dropdown options
124
  genders = df['GENDER'].dropna().unique().tolist()
 
127
 
128
  # Create Gradio interface
129
  iface = gr.Interface(
130
+ fn=predict_total_pay,
131
  inputs=[
132
  gr.Dropdown(choices=genders, label="Gender"),
133
  gr.Dropdown(choices=ethnicities, label="Ethnicity"),