Spaces:

Zeyadd-Mostaffa
/

NTI_ML_Project

Sleeping

App Files Files Community

Zeyadd-Mostaffa commited on May 21

Commit

de496ae

verified ·

1 Parent(s): 071143b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -57

app.py CHANGED Viewed

@@ -1,23 +1,18 @@
 import gradio as gr
-import xgboost as xgb
 import numpy as np
 import pandas as pd
 import joblib
 import os
 import warnings
-import shap
-import matplotlib.pyplot as plt
-# Suppress XGBoost warnings
-warnings.filterwarnings("ignore", category=UserWarning, message=".*WARNING.*")
-# Load your model (automatically detect XGBoost or joblib model)
 def load_model():
-    model_path = "xgboost_model.json"  # Ensure this matches your file name
-    if os.path.exists(model_path):
-        model = xgb.Booster()
-        model.load_model(model_path)
-        print("✅ Model loaded successfully.")
         return model
     else:
         print("❌ Model file not found.")
@@ -25,12 +20,11 @@ def load_model():
 model = load_model()
-# Prediction function with consistent feature names
 def predict_employee_status(satisfaction_level, last_evaluation, number_project,
-                            average_monthly_hours, time_spend_company,
                             work_accident, promotion_last_5years, salary, department, threshold=0.5):
-    # One-hot encode the department
     departments = [
         'RandD', 'accounting', 'hr', 'management', 'marketing',
         'product_mng', 'sales', 'support', 'technical'
@@ -39,67 +33,56 @@ def predict_employee_status(satisfaction_level, last_evaluation, number_project,
     if department in departments:
         department_features[f"department_{department}"] = 1
-    # Automatically Generate Interaction Features
     satisfaction_evaluation = satisfaction_level * last_evaluation
     work_balance = average_monthly_hours / number_project
-    # Prepare the input with all expected features as a DataFrame with column names
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
         "number_project": [number_project],
         "average_monthly_hours": [average_monthly_hours],
-        "time_spend_company": [time_spend_company],  # Corrected to match training data
         "Work_accident": [work_accident],
         "promotion_last_5years": [promotion_last_5years],
         "salary": [salary],
-        "satisfaction_evaluation": [satisfaction_evaluation],  # Added for model compatibility
-        "work_balance": [work_balance],  # Added for model compatibility
         **department_features
     }
     input_df = pd.DataFrame(input_data)
-    # Predict using the model
     if model is None:
-        return "❌ No model found. Please upload the model file."
     try:
-        dmatrix = xgb.DMatrix(input_df)
-        prediction = model.predict(dmatrix)
-        prediction_prob = prediction[0]
-        # Apply the dynamic threshold
-        result = "✅ Employee is likely to quit." if prediction_prob >= threshold else "✅ Employee is likely to stay."
-        return f"{result} (Probability: {prediction_prob:.2%})"
     except Exception as e:
-        return f"❌ Error: {str(e)}"
-# Gradio interface with consistent feature names
-def gradio_interface():
-    interface = gr.Interface(
-        fn=predict_employee_status,
-        inputs=[
-            gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
-            gr.Number(label="Last Evaluation (0.0 - 1.0)"),
-            gr.Number(label="Number of Projects (1 - 10)"),
-            gr.Number(label="Average Monthly Hours (80 - 320)"),
-            gr.Number(label="Time Spend at Company (Years)"),  # Corrected to match the notebook
-            gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
-            gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
-            gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
-            gr.Dropdown(
-                ['RandD', 'accounting', 'hr', 'management', 'marketing',
-                 'product_mng', 'sales', 'support', 'technical'],
-                label="Department"
-            ),
-            gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
-        ],
-        outputs="text",
-        title="Employee Retention Prediction System (With SHAP & ROC Threshold)",
-        description="Predict whether an employee is likely to stay or quit based on their profile. Adjust the threshold for accurate predictions.",
-        theme="dark"
-    )
-    interface.launch()
-gradio_interface()

 import gradio as gr
 import numpy as np
 import pandas as pd
 import joblib
 import os
 import warnings
+warnings.filterwarnings("ignore")
+# Load Ensemble Model
 def load_model():
+    model_path = "final_ensemble_model.pkl"  # Must match your saved model name
+    if os.path.exists(model_path):
+        model = joblib.load(model_path)
+        print("✅ Ensemble model loaded successfully.")
         return model
     else:
         print("❌ Model file not found.")
 model = load_model()
+# Prediction function
 def predict_employee_status(satisfaction_level, last_evaluation, number_project,
+                            average_monthly_hours, time_spend_company,
                             work_accident, promotion_last_5years, salary, department, threshold=0.5):
     departments = [
         'RandD', 'accounting', 'hr', 'management', 'marketing',
         'product_mng', 'sales', 'support', 'technical'
     if department in departments:
         department_features[f"department_{department}"] = 1
+    # Feature engineering
     satisfaction_evaluation = satisfaction_level * last_evaluation
     work_balance = average_monthly_hours / number_project
+    # Construct DataFrame
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
         "number_project": [number_project],
         "average_monthly_hours": [average_monthly_hours],
+        "time_spend_company": [time_spend_company],
         "Work_accident": [work_accident],
         "promotion_last_5years": [promotion_last_5years],
         "salary": [salary],
+        "satisfaction_evaluation": [satisfaction_evaluation],
+        "work_balance": [work_balance],
         **department_features
     }
     input_df = pd.DataFrame(input_data)
+    # Prediction
     if model is None:
+        return "❌ No model loaded."
     try:
+        prob = model.predict_proba(input_df)[0][1]
+        label = "✅ Employee is likely to quit." if prob >= threshold else "✅ Employee is likely to stay."
+        return f"{label} (Probability: {prob:.2%})"
     except Exception as e:
+        return f"❌ Error during prediction: {str(e)}"
+# Launch Gradio Interface
+gr.Interface(
+    fn=predict_employee_status,
+    inputs=[
+        gr.Number(label="Satisfaction Level (0.0 - 1.0)"),
+        gr.Number(label="Last Evaluation (0.0 - 1.0)"),
+        gr.Number(label="Number of Projects (1 - 10)"),
+        gr.Number(label="Average Monthly Hours (80 - 320)"),
+        gr.Number(label="Time Spend at Company (Years)"),
+        gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"),
+        gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"),
+        gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"),
+        gr.Dropdown(departments, label="Department"),
+        gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold")
+    ],
+    outputs="text",
+    title="Employee Retention Prediction System (Voting Ensemble)",
+    description="Predict whether an employee will stay or quit. Adjust threshold for sensitivity.",
+    theme="dark"
+).launch()