Zeyadd-Mostaffa commited on
Commit
de496ae
Β·
verified Β·
1 Parent(s): 071143b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -57
app.py CHANGED
@@ -1,23 +1,18 @@
1
  import gradio as gr
2
- import xgboost as xgb
3
  import numpy as np
4
  import pandas as pd
5
  import joblib
6
  import os
7
  import warnings
8
- import shap
9
- import matplotlib.pyplot as plt
10
 
11
- # Suppress XGBoost warnings
12
- warnings.filterwarnings("ignore", category=UserWarning, message=".*WARNING.*")
13
 
14
- # Load your model (automatically detect XGBoost or joblib model)
15
  def load_model():
16
- model_path = "xgboost_model.json" # Ensure this matches your file name
17
- if os.path.exists(model_path):
18
- model = xgb.Booster()
19
- model.load_model(model_path)
20
- print("βœ… Model loaded successfully.")
21
  return model
22
  else:
23
  print("❌ Model file not found.")
@@ -25,12 +20,11 @@ def load_model():
25
 
26
  model = load_model()
27
 
28
- # Prediction function with consistent feature names
29
  def predict_employee_status(satisfaction_level, last_evaluation, number_project,
30
- average_monthly_hours, time_spend_company,
31
  work_accident, promotion_last_5years, salary, department, threshold=0.5):
32
-
33
- # One-hot encode the department
34
  departments = [
35
  'RandD', 'accounting', 'hr', 'management', 'marketing',
36
  'product_mng', 'sales', 'support', 'technical'
@@ -39,67 +33,56 @@ def predict_employee_status(satisfaction_level, last_evaluation, number_project,
39
  if department in departments:
40
  department_features[f"department_{department}"] = 1
41
 
42
- # Automatically Generate Interaction Features
43
  satisfaction_evaluation = satisfaction_level * last_evaluation
44
  work_balance = average_monthly_hours / number_project
45
 
46
- # Prepare the input with all expected features as a DataFrame with column names
47
  input_data = {
48
  "satisfaction_level": [satisfaction_level],
49
  "last_evaluation": [last_evaluation],
50
  "number_project": [number_project],
51
  "average_monthly_hours": [average_monthly_hours],
52
- "time_spend_company": [time_spend_company], # Corrected to match training data
53
  "Work_accident": [work_accident],
54
  "promotion_last_5years": [promotion_last_5years],
55
  "salary": [salary],
56
- "satisfaction_evaluation": [satisfaction_evaluation], # Added for model compatibility
57
- "work_balance": [work_balance], # Added for model compatibility
58
  **department_features
59
  }
60
 
61
  input_df = pd.DataFrame(input_data)
62
 
63
- # Predict using the model
64
  if model is None:
65
- return "❌ No model found. Please upload the model file."
66
 
67
  try:
68
- dmatrix = xgb.DMatrix(input_df)
69
- prediction = model.predict(dmatrix)
70
- prediction_prob = prediction[0]
71
-
72
- # Apply the dynamic threshold
73
- result = "βœ… Employee is likely to quit." if prediction_prob >= threshold else "βœ… Employee is likely to stay."
74
- return f"{result} (Probability: {prediction_prob:.2%})"
75
  except Exception as e:
76
- return f"❌ Error: {str(e)}"
77
 
78
- # Gradio interface with consistent feature names
79
- def gradio_interface():
80
- interface = gr.Interface(
81
- fn=predict_employee_status,
82
- inputs=[
83
- gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
84
- gr.Number(label="Last Evaluation (0.0 - 1.0)"),
85
- gr.Number(label="Number of Projects (1 - 10)"),
86
- gr.Number(label="Average Monthly Hours (80 - 320)"),
87
- gr.Number(label="Time Spend at Company (Years)"), # Corrected to match the notebook
88
- gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
89
- gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
90
- gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
91
- gr.Dropdown(
92
- ['RandD', 'accounting', 'hr', 'management', 'marketing',
93
- 'product_mng', 'sales', 'support', 'technical'],
94
- label="Department"
95
- ),
96
- gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
97
- ],
98
- outputs="text",
99
- title="Employee Retention Prediction System (With SHAP & ROC Threshold)",
100
- description="Predict whether an employee is likely to stay or quit based on their profile. Adjust the threshold for accurate predictions.",
101
- theme="dark"
102
- )
103
- interface.launch()
104
 
105
- gradio_interface()
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import pandas as pd
4
  import joblib
5
  import os
6
  import warnings
 
 
7
 
8
+ warnings.filterwarnings("ignore")
 
9
 
10
+ # Load Ensemble Model
11
  def load_model():
12
+ model_path = "final_ensemble_model.pkl" # Must match your saved model name
13
+ if os.path.exists(model_path):
14
+ model = joblib.load(model_path)
15
+ print("βœ… Ensemble model loaded successfully.")
 
16
  return model
17
  else:
18
  print("❌ Model file not found.")
 
20
 
21
  model = load_model()
22
 
23
+ # Prediction function
24
  def predict_employee_status(satisfaction_level, last_evaluation, number_project,
25
+ average_monthly_hours, time_spend_company,
26
  work_accident, promotion_last_5years, salary, department, threshold=0.5):
27
+
 
28
  departments = [
29
  'RandD', 'accounting', 'hr', 'management', 'marketing',
30
  'product_mng', 'sales', 'support', 'technical'
 
33
  if department in departments:
34
  department_features[f"department_{department}"] = 1
35
 
36
+ # Feature engineering
37
  satisfaction_evaluation = satisfaction_level * last_evaluation
38
  work_balance = average_monthly_hours / number_project
39
 
40
+ # Construct DataFrame
41
  input_data = {
42
  "satisfaction_level": [satisfaction_level],
43
  "last_evaluation": [last_evaluation],
44
  "number_project": [number_project],
45
  "average_monthly_hours": [average_monthly_hours],
46
+ "time_spend_company": [time_spend_company],
47
  "Work_accident": [work_accident],
48
  "promotion_last_5years": [promotion_last_5years],
49
  "salary": [salary],
50
+ "satisfaction_evaluation": [satisfaction_evaluation],
51
+ "work_balance": [work_balance],
52
  **department_features
53
  }
54
 
55
  input_df = pd.DataFrame(input_data)
56
 
57
+ # Prediction
58
  if model is None:
59
+ return "❌ No model loaded."
60
 
61
  try:
62
+ prob = model.predict_proba(input_df)[0][1]
63
+ label = "βœ… Employee is likely to quit." if prob >= threshold else "βœ… Employee is likely to stay."
64
+ return f"{label} (Probability: {prob:.2%})"
 
 
 
 
65
  except Exception as e:
66
+ return f"❌ Error during prediction: {str(e)}"
67
 
68
+ # Launch Gradio Interface
69
+ gr.Interface(
70
+ fn=predict_employee_status,
71
+ inputs=[
72
+ gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
73
+ gr.Number(label="Last Evaluation (0.0 - 1.0)"),
74
+ gr.Number(label="Number of Projects (1 - 10)"),
75
+ gr.Number(label="Average Monthly Hours (80 - 320)"),
76
+ gr.Number(label="Time Spend at Company (Years)"),
77
+ gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
78
+ gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
79
+ gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
80
+ gr.Dropdown(departments, label="Department"),
81
+ gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
82
+ ],
83
+ outputs="text",
84
+ title="Employee Retention Prediction System (Voting Ensemble)",
85
+ description="Predict whether an employee will stay or quit. Adjust threshold for sensitivity.",
86
+ theme="dark"
87
+ ).launch()
 
 
 
 
 
 
88