Spaces:

Zeyadd-Mostaffa
/

NTI_ML_Project

Sleeping

App Files Files Community

Zeyadd-Mostaffa commited on May 17

Commit

2ab8b05

verified ·

1 Parent(s): cdda9e4

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import warnings
 import shap
 import matplotlib.pyplot as plt
 from sklearn.metrics import roc_curve, precision_recall_curve
 # Suppress XGBoost warnings
 warnings.filterwarnings("ignore", category=UserWarning, message=".*WARNING.*")
@@ -26,17 +27,7 @@ def load_model():
 model = load_model()
-# Automatically find the best threshold using ROC
-def optimize_threshold(X_test, y_test):
-    dmatrix = xgb.DMatrix(X_test)
-    y_prob = model.predict(dmatrix)
-    fpr, tpr, thresholds = roc_curve(y_test, y_prob)
-    optimal_idx = np.argmax(tpr - fpr)
-    optimal_threshold = thresholds[optimal_idx]
-    return optimal_threshold
-# Prediction function with dynamic threshold
 def predict_employee_status(satisfaction_level, last_evaluation, number_project,
                             average_monthly_hours, time_spent_company,
                             work_accident, promotion_last_5years, salary, department, threshold=0.5):
@@ -50,7 +41,11 @@ def predict_employee_status(satisfaction_level, last_evaluation, number_project,
     if department in departments:
         department_features[f"department_{department}"] = 1
-    # Prepare the input with all 17 features as a DataFrame with column names
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
@@ -60,6 +55,8 @@ def predict_employee_status(satisfaction_level, last_evaluation, number_project,
         "Work_accident": [work_accident],
         "promotion_last_5years": [promotion_last_5years],
         "salary": [salary],
         **department_features
     }
@@ -127,3 +124,4 @@ def gradio_interface():
     interface.launch()
 gradio_interface()

 import shap
 import matplotlib.pyplot as plt
 from sklearn.metrics import roc_curve, precision_recall_curve
+from imblearn.over_sampling import SMOTE
 # Suppress XGBoost warnings
 warnings.filterwarnings("ignore", category=UserWarning, message=".*WARNING.*")
 model = load_model()
+# Prediction function with dynamic threshold and balanced data
 def predict_employee_status(satisfaction_level, last_evaluation, number_project,
                             average_monthly_hours, time_spent_company,
                             work_accident, promotion_last_5years, salary, department, threshold=0.5):
     if department in departments:
         department_features[f"department_{department}"] = 1
+    # Automatically Generate Interaction Features
+    satisfaction_evaluation = satisfaction_level * last_evaluation
+    work_balance = average_monthly_hours / number_project
+    # Prepare the input with all expected features as a DataFrame with column names
     input_data = {
         "satisfaction_level": [satisfaction_level],
         "last_evaluation": [last_evaluation],
         "Work_accident": [work_accident],
         "promotion_last_5years": [promotion_last_5years],
         "salary": [salary],
+        "satisfaction_evaluation": [satisfaction_evaluation],
+        "work_balance": [work_balance],
         **department_features
     }
     interface.launch()
 gradio_interface()