import os
HF_TOKEN = os.getenv("HF_TOKEN")
import numpy as np
import pandas as pd
import sklearn
import sklearn.metrics
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve, auc, precision_score, recall_score, f1_score, classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, matthews_corrcoef
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve
from math import sqrt
from scipy import stats as st
from random import randrange
from matplotlib import pyplot as plt
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from catboost import Pool
from sklearn.ensemble import RandomForestClassifier
import optuna
from optuna.samplers import TPESampler
import shap
import gradio as gr
import random
import re
import textwrap
from datasets import load_dataset
#Read data.
x1 = load_dataset("mertkarabacak/TQP-atEDH", data_files="mortality_data_train.csv", use_auth_token = HF_TOKEN)
x1 = pd.DataFrame(x1['train'])
variables1 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Bleeding_Localization', 'Bleeding_Size', 'Current_Smoker', 'Alcohol_Use_Disorder', 'Substance_Abuse_Disorder', 'Diabetes_Mellitus', 'Hypertension', 'Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Peripheral_Arterial_Disease', 'Chronic_Obstructive_Pulmonary_Disease', 'Chronic_Renal_Failure', 'Cirrhosis', 'Bleeding_Disorder', 'Disseminated_Cancer', 'Currently_Receiving_Chemotherapy_for_Cancer', 'Dementia', 'Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Protective_Device', 'Cerebral_Monitoring', 'OUTCOME']
x1 = x1[variables1]
x2 = load_dataset("mertkarabacak/TQP-atEDH", data_files="discharge_data_train.csv", use_auth_token = HF_TOKEN)
x2 = pd.DataFrame(x2['train'])
variables2= ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Bleeding_Localization', 'Bleeding_Size', 'Current_Smoker', 'Alcohol_Use_Disorder', 'Substance_Abuse_Disorder', 'Diabetes_Mellitus', 'Hypertension', 'Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Peripheral_Arterial_Disease', 'Chronic_Obstructive_Pulmonary_Disease', 'Chronic_Renal_Failure', 'Cirrhosis', 'Bleeding_Disorder', 'Disseminated_Cancer', 'Currently_Receiving_Chemotherapy_for_Cancer', 'Dementia', 'Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Protective_Device', 'Cerebral_Monitoring', 'OUTCOME']
x2 = x2[variables2]
x3 = load_dataset("mertkarabacak/TQP-atEDH", data_files="los_data_train.csv", use_auth_token = HF_TOKEN)
x3 = pd.DataFrame(x3['train'])
variables3 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Bleeding_Localization', 'Bleeding_Size', 'Current_Smoker', 'Alcohol_Use_Disorder', 'Substance_Abuse_Disorder', 'Diabetes_Mellitus', 'Hypertension', 'Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Peripheral_Arterial_Disease', 'Chronic_Obstructive_Pulmonary_Disease', 'Chronic_Renal_Failure', 'Cirrhosis', 'Bleeding_Disorder', 'Disseminated_Cancer', 'Currently_Receiving_Chemotherapy_for_Cancer', 'Dementia', 'Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Protective_Device', 'Cerebral_Monitoring', 'OUTCOME']
x3 = x3[variables3]
x4 = load_dataset("mertkarabacak/TQP-atEDH", data_files="iculos_data_train.csv", use_auth_token = HF_TOKEN)
x4 = pd.DataFrame(x4['train'])
variables4 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Bleeding_Localization', 'Bleeding_Size', 'Current_Smoker', 'Alcohol_Use_Disorder', 'Substance_Abuse_Disorder', 'Diabetes_Mellitus', 'Hypertension', 'Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Peripheral_Arterial_Disease', 'Chronic_Obstructive_Pulmonary_Disease', 'Chronic_Renal_Failure', 'Cirrhosis', 'Bleeding_Disorder', 'Disseminated_Cancer', 'Currently_Receiving_Chemotherapy_for_Cancer', 'Dementia', 'Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Protective_Device', 'Cerebral_Monitoring', 'OUTCOME']
x4 = x4[variables4]
x5 = load_dataset("mertkarabacak/TQP-atEDH", data_files="complications_data_train.csv", use_auth_token = HF_TOKEN)
x5 = pd.DataFrame(x5['train'])
variables5 = ['Age', 'Sex', 'Ethnicity', 'Weight', 'Height', 'Systolic_Blood_Pressure', 'Pulse_Rate', 'Supplemental_Oxygen', 'Pulse_Oximetry', 'Respiratory_Assistance', 'Respiratory_Rate', 'Temperature', 'GCS__Eye', 'GCS__Verbal', 'GCS__Motor', 'Total_GCS', 'Pupillary_Response', 'Midline_Shift', 'Bleeding_Localization', 'Bleeding_Size', 'Current_Smoker', 'Alcohol_Use_Disorder', 'Substance_Abuse_Disorder', 'Diabetes_Mellitus', 'Hypertension', 'Congestive_Heart_Failure', 'History_of_Myocardial_Infarction', 'Angina_Pectoris', 'History_of_Cerebrovascular_Accident', 'Peripheral_Arterial_Disease', 'Chronic_Obstructive_Pulmonary_Disease', 'Chronic_Renal_Failure', 'Cirrhosis', 'Bleeding_Disorder', 'Disseminated_Cancer', 'Currently_Receiving_Chemotherapy_for_Cancer', 'Dementia', 'Attention_Deficit_Disorder_or_Attention_Deficit_Hyperactivity_Disorder', 'Mental_or_Personality_Disorder', 'Ability_to_Complete_AgeAppropriate_ADL', 'Pregnancy', 'Anticoagulant_Therapy', 'Steroid_Use', 'Days_from_Incident_to_ED_or_Hospital_Arrival', 'Transport_Mode', 'InterFacility_Transfer', 'Trauma_Type', 'Injury_Intent', 'Mechanism_of_Injury', 'WorkRelated', 'Blood_Transfusion', 'Neurosurgical_Intervention', 'Alcohol_Screen', 'Alcohol_Screen_Result', 'Drug_Screen__Amphetamine', 'Drug_Screen__Barbiturate', 'Drug_Screen__Benzodiazepines', 'Drug_Screen__Cannabinoid', 'Drug_Screen__Cocaine', 'Drug_Screen__MDMA_or_Ecstasy', 'Drug_Screen__Methadone', 'Drug_Screen__Methamphetamine', 'Drug_Screen__Opioid', 'Drug_Screen__Oxycodone', 'Drug_Screen__Phencyclidine', 'Drug_Screen__Tricyclic_Antidepressant', 'ACS_Verification_Level', 'Hospital_Type', 'Facility_Bed_Size', 'Primary_Method_of_Payment', 'Race', 'Protective_Device', 'Cerebral_Monitoring', 'OUTCOME']
x5 = x5[variables5]
#Define feature names.
f1_names = list(x1.columns)
f1_names = [f1.replace('__', ' - ') for f1 in f1_names]
f1_names = [f1.replace('_', ' ') for f1 in f1_names]
f2_names = list(x2.columns)
f2_names = [f2.replace('__', ' - ') for f2 in f2_names]
f2_names = [f2.replace('_', ' ') for f2 in f2_names]
f3_names = list(x3.columns)
f3_names = [f3.replace('__', ' - ') for f3 in f3_names]
f3_names = [f3.replace('_', ' ') for f3 in f3_names]
f4_names = list(x4.columns)
f4_names = [f4.replace('__', ' - ') for f4 in f4_names]
f4_names = [f4.replace('_', ' ') for f4 in f4_names]
f5_names = list(x5.columns)
f5_names = [f5.replace('__', ' - ') for f5 in f5_names]
f5_names = [f5.replace('_', ' ') for f5 in f5_names]
#Assign unique values as answer options.
unique_SEX = ['Male', 'Female', 'Unknown']
unique_RACE = ['White', 'Black', 'Asian', 'American Indian', 'Pacific Islander', 'Other', 'Unknown']
unique_ETHNICITY = ['Not Hispanic or Latino', 'Hispanic or Latino', 'Unknown']
unique_SUPPLEMENTALOXYGEN = ['No supplemental oxygen', 'Supplemental oxygen', 'Unknown']
unique_RESPIRATORYASSISTANCE = ['Unassisted respiratory rate', 'Assisted respiratory rate', 'Unknown']
unique_TBIPUPILLARYRESPONSE = ['Both reactive', 'One reactive', 'Neither reactive', 'Unknown']
unique_TBIMIDLINESHIFT = ['No', 'Yes', 'Not imaged/unknown']
unique_LOCALIZATION = ['Supratentorial', 'Infratentorial']
unique_SIZE = ['Large, massive, or extensive (more than 30cc, more than 1cm thick', 'Small or moderate (less than 30cc or 0.6-1cm thick)', 'Tiny (less than 0.6cm thick)', 'Bilateral small or moderate (less than 30cc or 0.6-1cm thick)', 'Bilateral large, massive, or extensive (more than 30cc, more than 1cm thick)']
unique_CC_SMOKING = ['No', 'Yes', 'Unknown']
unique_CC_ALCOHOLISM = ['No', 'Yes', 'Unknown']
unique_CC_SUBSTANCEABUSE = ['No', 'Yes', 'Unknown']
unique_CC_DIABETES = ['No', 'Yes', 'Unknown']
unique_CC_HYPERTENSION = ['No', 'Yes', 'Unknown']
unique_CC_CHF = ['No', 'Yes', 'Unknown']
unique_CC_MI = ['No', 'Yes', 'Unknown']
unique_CC_ANGINAPECTORIS = ['No', 'Yes', 'Unknown']
unique_CC_CVA = ['No', 'Yes', 'Unknown']
unique_CC_PAD = ['No', 'Yes', 'Unknown']
unique_CC_COPD = ['No', 'Yes', 'Unknown']
unique_CC_RENAL = ['No', 'Yes', 'Unknown']
unique_CC_CIRRHOSIS = ['No', 'Yes', 'Unknown']
unique_CC_BLEEDING = ['No', 'Yes', 'Unknown']
unique_CC_DISCANCER = ['No', 'Yes', 'Unknown']
unique_CC_CHEMO = ['No', 'Yes', 'Unknown']
unique_CC_DEMENTIA = ['No', 'Yes', 'Unknown']
unique_CC_ADHD = ['No', 'Yes', 'Unknown']
unique_CC_MENTALPERSONALITY = ['No', 'Yes', 'Unknown']
unique_CC_FUNCTIONAL = ['No', 'Yes', 'Unknown']
unique_CC_PREGNANCY = ['No', 'Yes', 'Unknown', 'Not applicable (male patient)']
unique_CC_ANTICOAGULANT = ['No', 'Yes', 'Unknown']
unique_CC_STEROID = ['No', 'Yes', 'Unknown']
unique_TRANSPORTMODE = ['Ground ambulance', 'Private vehicle/public vehicle/walk-in', 'Air ambulance', 'Other/police/unknown/etc.']
unique_INTERFACILITYTRANSFER = ['No', 'Yes']
unique_TRAUMATYPE = ['Blunt', 'Penetrating', 'Other/unknown']
unique_INTENT = ['Unintentional', 'Assault', 'Self-inflicted', 'Other/unknown']
unique_MECHANISM = ['Fall', 'Struck by or against', 'MVT occupant', 'MVT pedestrian', 'MVT motorcyclist', 'MVT pedal cyclist', 'Other MVT', 'Other transport', 'Other pedestrian', 'Other pedal cyclist', 'Firearm', 'Cut/pierce', 'Natural/environmental', 'Machinery', 'Overexertion', 'Other/unspecified/unknown']
unique_PROTDEV = ['None', 'Belt', 'Airbag present', 'Helmet', 'Protective clothing', 'Protective non-clothing gear', 'Eye protection', 'Other']
unique_WORKRELATED = ['No', 'Yes']
unique_INTERVENTION = ['No', 'Yes']
unique_ICP = ['None', 'Intraventricular drain/catheter', 'Intraparenchymal oxygen/pressure monitor', 'Jugular venous bulb', 'Unknown']
unique_ALCOHOLSCREEN = ['Yes', 'No', 'Unknown']
unique_ANTIBIOTICTHERAPY = ['Yes', 'No', 'Unknown']
unique_DRGSCR_AMPHETAMINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_BARBITURATE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_BENZODIAZEPINES = ['Not tested', 'No', 'Yes']
unique_DRGSCR_CANNABINOID = ['Not tested', 'No', 'Yes']
unique_DRGSCR_COCAINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_ECSTASY = ['Not tested', 'No', 'Yes']
unique_DRGSCR_METHADONE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_METHAMPHETAMINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_OPIOID = ['Not tested', 'No', 'Yes']
unique_DRGSCR_OXYCODONE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_PHENCYCLIDINE = ['Not tested', 'No', 'Yes']
unique_DRGSCR_TRICYCLICDEPRESS = ['Not tested', 'No', 'Yes']
unique_VERIFICATIONLEVEL = ['Level I Trauma Center', 'Level II Trauma Center', 'Level III Trauma Center', 'Unknown']
unique_HOSPITALTYPE = ['Non-profit', 'For profit', 'Government', 'Unknown']
unique_BEDSIZE = ['More than 600', '401 to 600', '201 to 400', '200 or fewer']
unique_PRIMARYMETHODPAYMENT = ['Private/commercial insurance', 'Medicaid', 'Medicare', 'Other government', 'Self-pay', 'Other/Unknown']
#Prepare data for the outcome 1 (mortality).
y1 = x1.pop('OUTCOME')
categorical_columns1 = list(x1.select_dtypes('object').columns)
le = sklearn.preprocessing.LabelEncoder()
x1[categorical_columns1] = x1[categorical_columns1].apply(le.fit_transform)
#Prepare data for the outcome 2 (discharge).
y2 = x2.pop('OUTCOME')
categorical_columns2 = list(x2.select_dtypes('object').columns)
le = sklearn.preprocessing.LabelEncoder()
x2[categorical_columns2] = x2[categorical_columns2].apply(le.fit_transform)
#Prepare data for the outcome 3 (LOS).
y3 = x3.pop('OUTCOME')
categorical_columns3 = list(x3.select_dtypes('object').columns)
le = sklearn.preprocessing.LabelEncoder()
x3[categorical_columns3] = x3[categorical_columns3].apply(le.fit_transform)
#Prepare data for the outcome 4 (ICU LOS).
y4 = x4.pop('OUTCOME')
categorical_columns4 = list(x4.select_dtypes('object').columns)
le = sklearn.preprocessing.LabelEncoder()
x4[categorical_columns4] = x4[categorical_columns4].apply(le.fit_transform)
#Prepare data for the outcome 5 (complications).
y5 = x5.pop('OUTCOME')
categorical_columns5 = list(x5.select_dtypes('object').columns)
le = sklearn.preprocessing.LabelEncoder()
x5[categorical_columns5] = x5[categorical_columns5].apply(le.fit_transform)
#Assign hyperparameters.
y1_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.002295655265750986, 'lambda_l2': 1.2775068086216506, 'num_leaves': 247, 'feature_fraction': 0.8441006608783833, 'bagging_fraction': 0.43832387797183897, 'bagging_freq': 1, 'min_child_samples': 100, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
y2_params = {'criterion': 'gini', 'max_features': None, 'max_depth': 5, 'n_estimators': 1700, 'min_samples_leaf': 2, 'min_samples_split': 2, 'random_state': 31}
y3_params = {'objective': 'CrossEntropy', 'colsample_bylevel': 0.055268360804468515, 'depth': 12, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bayesian', 'bagging_temperature': 6.4211945230724465, 'used_ram_limit': '3gb', 'eval_metric': 'AUC', 'random_state': 31}
y4_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.00026039217865088984, 'lambda_l2': 3.4197219088950787e-08, 'num_leaves': 90, 'feature_fraction': 0.4993893704314224, 'bagging_fraction': 0.45102286812852366, 'bagging_freq': 7, 'min_child_samples': 91, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
y5_params = {'objective': 'binary', 'boosting_type': 'gbdt', 'lambda_l1': 0.0016190622681086678, 'lambda_l2': 0.00041749233000407354, 'num_leaves': 2, 'feature_fraction': 0.5730231365909909, 'bagging_fraction': 0.6964002116636187, 'bagging_freq': 6, 'min_child_samples': 44, 'metric': 'binary_logloss', 'verbosity': -1, 'random_state': 31}
#Training models.
from lightgbm import LGBMClassifier
lgb = LGBMClassifier(**y1_params)
y1_model_lgb = lgb.fit(x1, y1)
y1_explainer_lgb = shap.TreeExplainer(y1_model_lgb)
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(**y2_params)
y2_model_rf = rf.fit(x2, y2)
y2_explainer_rf = shap.TreeExplainer(y2_model_rf)
from catboost import CatBoostClassifier
cb = CatBoostClassifier(**y3_params)
y3_model_cb = cb.fit(x3, y3)
y3_explainer_cb = shap.TreeExplainer(y3_model_cb)
from lightgbm import LGBMClassifier
lgb = LGBMClassifier(**y4_params)
y4_model_lgb = lgb.fit(x4, y4)
y4_explainer_lgb = shap.TreeExplainer(y4_model_lgb)
from lightgbm import LGBMClassifier
lgb = LGBMClassifier(**y5_params)
y5_model_lgb = lgb.fit(x5, y5)
y5_explainer_lgb = shap.TreeExplainer(y5_model_lgb)
#Define predict for y1 (mortality).
def y1_predict_xgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
df1 = df1.astype(d1)
pos_pred = y1_model_xgb.predict_proba(df1)
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
def y1_predict_lgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
df1 = df1.astype(d1)
pos_pred = y1_model_lgb.predict_proba(df1)
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
def y1_predict_cb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
pos_pred = y1_model_cb.predict(Pool(df1, cat_features = categorical_columns1), prediction_type='Probability')
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
def y1_predict_rf(*args):
df1 = pd.DataFrame([args], columns=x1_rf.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
d1 = dict.fromkeys(df1.select_dtypes(np.int64).columns, np.int32)
df1 = df1.astype(d1)
pos_pred = y1_model_rf.predict_proba(df1)
return {"Mortality": float(pos_pred[0][1]), "No Mortality": float(pos_pred[0][0])}
#Define predict for y2 (discharge).
def y2_predict_xgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
d2 = dict.fromkeys(df2.select_dtypes(np.int64).columns, np.int32)
df2 = df2.astype(d2)
pos_pred = y2_model_xgb.predict_proba(df2)
return {"Non-home Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
def y2_predict_lgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
d2 = dict.fromkeys(df2.select_dtypes(np.int64).columns, np.int32)
df2 = df2.astype(d2)
pos_pred = y2_model_lgb.predict_proba(df2)
return {"Non-home Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
def y2_predict_cb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
pos_pred = y2_model_cb.predict(Pool(df2, cat_features = categorical_columns2), prediction_type='Probability')
return {"Non-home Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
def y2_predict_rf(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
d2 = dict.fromkeys(df2.select_dtypes(np.int64).columns, np.int32)
df2 = df2.astype(d2)
pos_pred = y2_model_rf.predict_proba(df2)
return {"Non-home Discharge": float(pos_pred[0][1]), "Home Discharge": float(pos_pred[0][0])}
#Define predict for y3 (LOS).
def y3_predict_xgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
d3 = dict.fromkeys(df3.select_dtypes(np.int64).columns, np.int32)
df3 = df3.astype(d3)
pos_pred = y3_model_xgb.predict_proba(df3)
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
def y3_predict_lgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
d3 = dict.fromkeys(df3.select_dtypes(np.int64).columns, np.int32)
df3 = df3.astype(d3)
pos_pred = y3_model_lgb.predict_proba(df3)
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
def y3_predict_cb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
pos_pred = y3_model_cb.predict(Pool(df3, cat_features = categorical_columns3), prediction_type='Probability')
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
def y3_predict_rf(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
d3 = dict.fromkeys(df3.select_dtypes(np.int64).columns, np.int32)
df3 = df3.astype(d3)
pos_pred = y3_model_rf.predict_proba(df3)
return {"Prolonged LOS": float(pos_pred[0][1]), "No Prolonged LOS": float(pos_pred[0][0])}
#Define predict for y4 (ICU LOS).
def y4_predict_xgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
d4 = dict.fromkeys(df4.select_dtypes(np.int64).columns, np.int32)
df4 = df4.astype(d4)
pos_pred = y4_model_xgb.predict_proba(df4)
return {"Prolonged ICU-LOS": float(pos_pred[0][1]), "No Prolonged ICU-LOS": float(pos_pred[0][0])}
def y4_predict_lgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
d4 = dict.fromkeys(df4.select_dtypes(np.int64).columns, np.int32)
df4 = df4.astype(d4)
pos_pred = y4_model_lgb.predict_proba(df4)
return {"Prolonged ICU-LOS": float(pos_pred[0][1]), "No Prolonged ICU-LOS": float(pos_pred[0][0])}
def y4_predict_cb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
pos_pred = y4_model_cb.predict(Pool(df4, cat_features = categorical_columns4), prediction_type='Probability')
return {"Prolonged ICU-LOS": float(pos_pred[0][1]), "No Prolonged ICU-LOS": float(pos_pred[0][0])}
def y4_predict_rf(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
d4 = dict.fromkeys(df4.select_dtypes(np.int64).columns, np.int32)
df4 = df4.astype(d4)
pos_pred = y4_model_rf.predict_proba(df4)
return {"Prolonged ICU-LOS": float(pos_pred[0][1]), "No Prolonged ICU-LOS": float(pos_pred[0][0])}
#Define predict for y5 (complications).
def y5_predict_xgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
d5 = dict.fromkeys(df5.select_dtypes(np.int64).columns, np.int32)
df5 = df5.astype(d5)
pos_pred = y5_model_xgb.predict_proba(df5)
return {"Major Complicatons": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
def y5_predict_lgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
d5 = dict.fromkeys(df5.select_dtypes(np.int64).columns, np.int32)
df5 = df5.astype(d5)
pos_pred = y5_model_lgb.predict_proba(df5)
return {"Major Complicatons": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
def y5_predict_cb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
pos_pred = y5_model_cb.predict(Pool(df5, cat_features = categorical_columns5), prediction_type='Probability')
return {"Major Complicatons": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
def y5_predict_rf(*args):
df5 = pd.DataFrame([args], columns=x5_rf.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
d5 = dict.fromkeys(df5.select_dtypes(np.int64).columns, np.int32)
df5 = df5.astype(d5)
pos_pred = y5_model_rf.predict_proba(df5)
return {"Major Complicatons": float(pos_pred[0][1]), "No Major Complications": float(pos_pred[0][0])}
#Define function for wrapping feature labels.
def wrap_labels(ax, width, break_long_words=False):
labels = []
for label in ax.get_yticklabels():
text = label.get_text()
labels.append(textwrap.fill(text, width=width, break_long_words=break_long_words))
ax.set_yticklabels(labels, rotation=0)
#Define interpret for y1 (mortality).
def y1_interpret_xgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_xgb.shap_values(xgb.DMatrix(df1, enable_categorical=True))
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_lgb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_lgb.shap_values(df1)
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0][0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_cb(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_cb.shap_values(Pool(df1, cat_features = categorical_columns1))
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0], max_display = 10, show = False, feature_names = f1_names)
scores_desc = sorted(scores_desc)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y1_interpret_rf(*args):
df1 = pd.DataFrame([args], columns=x1.columns)
df1 = df1.astype({col: "category" for col in categorical_columns1})
shap_values1 = y1_explainer_rf.shap_values(df1)
shap_values1 = np.abs(shap_values1)
shap.bar_plot(shap_values1[0][0], max_display = 10, show = False, feature_names = f1_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y2 (discharge).
def y2_interpret_xgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_xgb.shap_values(xgb.DMatrix(df2, enable_categorical=True))
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_lgb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_lgb.shap_values(df2)
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0][0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_cb(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_cb.shap_values(Pool(df2, cat_features = categorical_columns2))
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y2_interpret_rf(*args):
df2 = pd.DataFrame([args], columns=x2.columns)
df2 = df2.astype({col: "category" for col in categorical_columns2})
shap_values2 = y2_explainer_rf.shap_values(df2)
shap_values2 = np.abs(shap_values2)
shap.bar_plot(shap_values2[0][0], max_display = 10, show = False, feature_names = f2_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y3 (LOS).
def y3_interpret_xgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_xgb.shap_values(xgb.DMatrix(df3, enable_categorical=True))
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_lgb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_lgb.shap_values(df3)
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0][0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_cb(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_cb.shap_values(Pool(df3, cat_features = categorical_columns3))
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y3_interpret_rf(*args):
df3 = pd.DataFrame([args], columns=x3.columns)
df3 = df3.astype({col: "category" for col in categorical_columns3})
shap_values3 = y3_explainer_rf.shap_values(df3)
shap_values3 = np.abs(shap_values3)
shap.bar_plot(shap_values3[0][0], max_display = 10, show = False, feature_names = f3_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y4 (ICU LOS).
def y4_interpret_xgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_xgb.shap_values(xgb.DMatrix(df4, enable_categorical=True))
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_lgb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_lgb.shap_values(df4)
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0][0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_cb(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer_cb.shap_values(Pool(df4, cat_features = categorical_columns4))
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y4_interpret_rf(*args):
df4 = pd.DataFrame([args], columns=x4.columns)
df4 = df4.astype({col: "category" for col in categorical_columns4})
shap_values4 = y4_explainer.shap_values(df4)
shap_values4 = np.abs(shap_values4)
shap.bar_plot(shap_values4[0][0], max_display = 10, show = False, feature_names = f4_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
#Define interpret for y5 (complications).
def y5_interpret_xgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_xgb.shap_values(xgb.DMatrix(df5, enable_categorical=True))
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_lgb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_lgb.shap_values(df5)
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0][0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_cb(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values5 = y5_explainer_cb.shap_values(Pool(df5, cat_features = categorical_columns5))
shap_values5 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
def y5_interpret_rf(*args):
df5 = pd.DataFrame([args], columns=x5.columns)
df5 = df5.astype({col: "category" for col in categorical_columns5})
shap_values = y5_explainer_rf.shap_values(df5)
shap_values1 = np.abs(shap_values5)
shap.bar_plot(shap_values5[0][0], max_display = 10, show = False, feature_names = f5_names)
fig = plt.gcf()
ax = plt.gca()
wrap_labels(ax, 20)
ax.figure
plt.tight_layout()
fig.set_figheight(7)
fig.set_figwidth(9)
plt.xlabel("SHAP value (impact on model output)", fontsize =12, fontweight = 'heavy', labelpad = 8)
plt.tick_params(axis="y",direction="out", labelsize = 12)
plt.tick_params(axis="x",direction="out", labelsize = 12)
return fig
with gr.Blocks(title = "TQP-atEDH") as demo:
gr.Markdown(
"""
Outcome | Algorithm | Weighted Precision | Weighted Recall | Weighted AUPRC | Balanced Accuracy | AUROC | Brier Score |
---|---|---|---|---|---|---|---|
Mortality | LightGBM | 0.982 (0.973 - 0.991) | 0.974 (0.963 - 0.985) | 0.394 (0.361 - 0.427) | 0.760 (0.731 - 0.789) | 0.924 (0.888 - 0.987) | 0.014 (0.006 - 0.022) |
Non-home Discharges | Random Forest | 0.758 (0.728 - 0.788) | 0.764 (0.734 - 0.794) | 0.510 (0.475 - 0.545) | 0.673 (0.640 - 0.706) | 0.798 (0.749 - 0.818) | 0.159 (0.133 - 0.185) |
Prolonged LOS | CatBoost | 0.811 (0.785 - 0.837) | 0.827 (0.802 - 0.852) | 0.308 (0.277 - 0.339) | 0.653 (0.621 - 0.685) | 0.751 (0.711 - 0.798) | 0.124 (0.102 - 0.146) |
Prolonged ICU-LOS | LightGBM | 0.82 (0.789 - 0.851) | 0.818 (0.787 - 0.849) | 0.303 (0.266 - 0.34) | 0.629 (0.590 - 0.668) | 0.774 (0.689 - 0.801) | 0.111 (0.086 - 0.136) |
Major Complications | LightGBM | 0.946 (0.930 - 0.962) | 0.821 (0.795 - 0.847) | 0.075 (0.057 - 0.093) | 0.578 (0.544 - 0.612) | 0.733 (0.610 - 0.801) | 0.030 (0.018 - 0.042) |