import gradio as gr import numpy as np import pandas as pd import joblib import warnings from huggingface_hub import hf_hub_download warnings.filterwarnings("ignore") # Load ensemble model def load_model(): model_path = hf_hub_download( repo_id="Zeyadd-Mostaffa/final_ensemble_model", filename="final_ensemble_model.pkl" ) model = joblib.load(model_path) print("✅ Ensemble model loaded successfully.") return model # Load scaler def load_scaler(): scaler_path = hf_hub_download( repo_id="Zeyadd-Mostaffa/final_ensemble_model", filename="scaler.pkl" ) scaler = joblib.load(scaler_path) print("✅ Scaler loaded successfully.") return scaler model = load_model() scaler = load_scaler() # Define prediction function def predict_employee_status( satisfaction_level, last_evaluation, number_project, average_monthly_hours, time_spend_company, work_accident, promotion_last_5years, salary, department, threshold=0.5 ): expected_columns = [ 'satisfaction_level', 'last_evaluation', 'number_project', 'average_monthly_hours', 'time_spend_company', 'Work_accident', 'promotion_last_5years', 'salary', 'satisfaction_evaluation', 'work_balance', 'department_IT', 'department_RandD', 'department_accounting', 'department_hr', 'department_management', 'department_marketing', 'department_product_mng', 'department_sales', 'department_support', 'department_technical' ] department_features = {col: 0 for col in expected_columns if col.startswith("department_")} dept_key = f"department_{department}" if dept_key in department_features: department_features[dept_key] = 1 satisfaction_evaluation = satisfaction_level * last_evaluation work_balance = average_monthly_hours / number_project input_data = { "satisfaction_level": [satisfaction_level], "last_evaluation": [last_evaluation], "number_project": [number_project], "average_monthly_hours": [average_monthly_hours], "time_spend_company": [time_spend_company], "Work_accident": [work_accident], "promotion_last_5years": [promotion_last_5years], "salary": [salary], "satisfaction_evaluation": [satisfaction_evaluation], "work_balance": [work_balance], **department_features } input_df = pd.DataFrame(input_data) # Ensure all expected columns exist for col in expected_columns: if col not in input_df.columns: input_df[col] = 0 input_df = input_df[expected_columns] # Apply scaling to same numerical columns as training numeric_cols = [ 'satisfaction_level', 'last_evaluation', 'average_monthly_hours', 'number_project', 'work_balance' ] input_df[numeric_cols] = scaler.transform(input_df[numeric_cols]) try: prob = model.predict_proba(input_df)[0][1] result = "✅ Employee is likely to quit." if prob >= threshold else "✅ Employee is likely to stay." return f"{result} (Probability: {prob:.2%})" except Exception as e: return f"❌ Prediction error: {str(e)}" # Gradio UI def gradio_interface(): interface = gr.Interface( fn=predict_employee_status, inputs=[ gr.Number(label="Satisfaction Level (0.0 - 1.0)"), gr.Number(label="Last Evaluation (0.0 - 1.0)"), gr.Number(label="Number of Projects (1 - 10)"), gr.Number(label="Average Monthly Hours (80 - 320)"), gr.Number(label="Time Spend at Company (Years)"), gr.Radio([0, 1], label="Work Accident (0 = No, 1 = Yes)"), gr.Radio([0, 1], label="Promotion in Last 5 Years (0 = No, 1 = Yes)"), gr.Radio([0, 1, 2], label="Salary (0 = Low, 1 = Medium, 2 = High)"), gr.Dropdown( ['IT', 'RandD', 'accounting', 'hr', 'management', 'marketing', 'product_mng', 'sales', 'support', 'technical'], label="Department" ), gr.Slider(0.1, 0.9, value=0.5, step=0.05, label="Prediction Threshold") ], outputs="text", title="Employee Retention Prediction System (Voting Ensemble)", description="Predict whether an employee is likely to stay or quit based on their profile. Supports threshold adjustment.", theme="dark" ) interface.launch() gradio_interface()