Spaces:
Runtime error
Runtime error
import streamlit as st | |
st.set_page_config(layout="wide", page_icon=":hospital:") | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import time | |
import matplotlib.pyplot as plt | |
plt.style.use('fivethirtyeight') | |
plt.style.use('default') | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.svm import SVC | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from xgboost import XGBClassifier | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler | |
from sklearn.metrics import precision_recall_fscore_support as score, mean_squared_error | |
from sklearn.metrics import confusion_matrix, accuracy_score | |
from sklearn.decomposition import PCA | |
######################################################################################################################## | |
start_time = time.time() | |
# Title for the webpage | |
tit1, tit2 = st.beta_columns((4, 1)) | |
tit1.markdown("<h1 style='text-align: center;'><u>Activity/ Pain Prediction With Wearable Technology Data</u> </h1>", | |
unsafe_allow_html=True) | |
st.sidebar.title("Dataset and ML Classifiers") | |
dataset_select = st.sidebar.selectbox("Select Dataset: ", ('AppleWatch Data', "Fitbit Data")) | |
classifier_select = st.sidebar.selectbox("Select ML Classifier: ", | |
("Logistic Regression", "KNN", "SVM", "Decision Trees", | |
"Random Forest", "Gradient Boosting", "XGBoost")) | |
LE = LabelEncoder() | |
def get_dataset(dataset_select): | |
if dataset_select == "AppleWatch Data": | |
data = pd.read_csv( | |
"https://raw.githubusercontent.com/ajinkyalahade/PainPredictionProject/main/Data/data_applewatch.csv") | |
st.header("Activity Data Apple Watch") | |
return data | |
else: | |
data = pd.read_csv( | |
"https://raw.githubusercontent.com/ajinkyalahade/PainPredictionProject/main/Data/data_fitbit.csv") | |
st.header("Activity Data Fitbit Watch") | |
return data | |
data = get_dataset(dataset_select) | |
def selected_dataset(dataset_select): | |
if dataset_select == "AppleWatch Data": | |
X = data.drop(["activitytag"], axis=1) | |
Y = data.activitytag | |
return X, Y | |
elif dataset_select == "Fitbit Data": | |
X = data.drop(["tag"], axis=1) | |
Y = data.tag | |
return X, Y | |
X, Y = selected_dataset(dataset_select) | |
# Charts | |
def plot_op(dataset_select): | |
col1, col2 = st.beta_columns((1, 5)) | |
plt.figure(figsize=(12, 3)) | |
plt.title("Classes in 'Y'") | |
if dataset_select == "AppleWatch Data": | |
col1.write(Y) | |
sns.countplot(Y, palette='colorblind') | |
col2.pyplot() | |
elif dataset_select == "Fitbit Data": | |
col1.write(Y) | |
sns.countplot(Y, palette='colorblind') | |
col2.pyplot() | |
######################################################################################################################## | |
st.write(data) | |
st.write("Shape of dataset: ", data.shape) | |
st.write("Number of classes: ", Y.nunique()) | |
plot_op(dataset_select) | |
######################################################################################################################## | |
def add_parameter_ui(clf_name): | |
params = {} | |
st.sidebar.write("Select Parameters: ") | |
if clf_name == "Logistic Regression": | |
R = st.sidebar.slider("Regularization", 0.1, 10.0, step=0.1) | |
MI = st.sidebar.slider("max_iter", 50, 400, step=50) | |
params["R"] = R | |
params["MI"] = MI | |
elif clf_name == "KNN": | |
K = st.sidebar.slider("n_neighbors", 1, 20) | |
params["K"] = K | |
elif clf_name == "SVM": | |
C = st.sidebar.slider("Regularization", 0.01, 10.0, step=0.01) | |
kernel = st.sidebar.selectbox("Kernel", ("linear", "poly", "rbf", "sigmoid", "precomputed")) | |
params["C"] = C | |
params["kernel"] = kernel | |
elif clf_name == "Decision Trees": | |
M = st.sidebar.slider("max_depth", 2, 20) | |
C = st.sidebar.selectbox("Criterion", ("gini", "entropy")) | |
SS = st.sidebar.slider("min_samples_split", 1, 10) | |
params["M"] = M | |
params["C"] = C | |
params["SS"] = SS | |
elif clf_name == "Random Forest": | |
N = st.sidebar.slider("n_estimators", 50, 500, step=50, value=100) | |
M = st.sidebar.slider("max_depth", 2, 20) | |
C = st.sidebar.selectbox("Criterion", ("gini", "entropy")) | |
params["N"] = N | |
params["M"] = M | |
params["C"] = C | |
elif clf_name == "Gradient Boosting": | |
N = st.sidebar.slider("n_estimators", 50, 500, step=50, value=100) | |
LR = st.sidebar.slider("Learning Rate", 0.01, 0.5) | |
L = st.sidebar.selectbox("Loss", ('deviance', 'exponential')) | |
M = st.sidebar.slider("max_depth", 2, 20) | |
params["N"] = N | |
params["LR"] = LR | |
params["L"] = L | |
params["M"] = M | |
elif clf_name == "XGBoost": | |
N = st.sidebar.slider("n_estimators", 50, 500, step=50, value=50) | |
LR = st.sidebar.slider("Learning Rate", 0.01, 0.5, value=0.1) | |
O = st.sidebar.selectbox("Objective", ('binary:logistic', 'reg:logistic', 'reg:squarederror', "reg:gamma")) | |
M = st.sidebar.slider("max_depth", 1, 20, value=6) | |
G = st.sidebar.slider("Gamma", 0, 10, value=5) | |
L = st.sidebar.slider("reg_lambda", 1.0, 5.0, step=0.1) | |
A = st.sidebar.slider("reg_alpha", 0.0, 5.0, step=0.1) | |
CS = st.sidebar.slider("colsample_bytree", 0.5, 1.0, step=0.1) | |
params["N"] = N | |
params["LR"] = LR | |
params["O"] = O | |
params["M"] = M | |
params["G"] = G | |
params["L"] = L | |
params["A"] = A | |
params["CS"] = CS | |
RS = st.sidebar.slider("Random State", 0, 100) | |
params["RS"] = RS | |
return params | |
params = add_parameter_ui(classifier_select) | |
# get classifier by selections above | |
def get_classifier(clf_name, params): | |
global clf | |
if clf_name == "Logistic Regression": | |
clf = LogisticRegression(C=params["R"], max_iter=params["MI"]) | |
elif clf_name == "KNN": | |
clf = KNeighborsClassifier(n_neighbors=params["K"]) | |
elif clf_name == "SVM": | |
clf = SVC(kernel=params["kernel"], C=params["C"]) | |
elif clf_name == "Decision Trees": | |
clf = DecisionTreeClassifier(max_depth=params["M"], criterion=params["C"], min_impurity_split=params["SS"]) | |
elif clf_name == "Random Forest": | |
clf = RandomForestClassifier(n_estimators=params["N"], max_depth=params["M"], criterion=params["C"]) | |
elif clf_name == "Gradient Boosting": | |
clf = GradientBoostingClassifier(n_estimators=params["N"], learning_rate=params["LR"], loss=params["L"], | |
max_depth=params["M"]) | |
elif clf_name == "XGBoost": | |
clf = XGBClassifier(booster="gbtree", n_estimators=params["N"], max_depth=params["M"], | |
learning_rate=params["LR"], | |
objective=params["O"], gamma=params["G"], reg_alpha=params["A"], reg_lambda=params["L"], | |
colsample_bytree=params["CS"]) | |
return clf | |
clf = get_classifier(classifier_select, params) | |
######################################################################################################################## | |
# get model trained | |
def model(): | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) | |
# MinMax Scaling / Normalization of data | |
Std_scaler = StandardScaler() | |
X_train = Std_scaler.fit_transform(X_train) | |
X_test = Std_scaler.transform(X_test) | |
clf.fit(X_train, Y_train) | |
Y_pred = clf.predict(X_test) | |
acc = accuracy_score(Y_test, Y_pred) | |
return Y_pred, Y_test | |
Y_pred, Y_test = model() | |
######################################################################################################################## | |
# Plot Output | |
def compute(Y_pred, Y_test): | |
# Plot PCA | |
pca = PCA(2) | |
X_projected = pca.fit_transform(X) | |
x1 = X_projected[:, 0] | |
x2 = X_projected[:, 1] | |
plt.figure(figsize=(16, 8)) | |
plt.scatter(x1, x2, c=Y, alpha=0.8, cmap="cividis") | |
plt.xlabel("Principal Component 1") | |
plt.ylabel("Principal Component 2") | |
plt.colorbar() | |
st.pyplot() | |
c1, c2 = st.beta_columns((4, 3)) | |
# Output plot | |
plt.figure(figsize=(12, 6)) | |
plt.scatter(range(len(Y_pred)), Y_pred, color="blue", lw=5, label="Predictions") | |
plt.scatter(range(len(Y_test)), Y_test, color="red", label="Actual") | |
plt.title("Prediction Values vs Real Values") | |
plt.legend() | |
plt.grid(True) | |
c1.pyplot() | |
# Confusion Matrix | |
cm = confusion_matrix(Y_test, Y_pred) | |
class_label = ["High-Pain-risk", "Low-Pain-risk"] | |
df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) | |
plt.figure(figsize=(12, 7.5)) | |
sns.heatmap(df_cm, annot=True, cmap='Set1', linewidths=2, fmt='d') | |
plt.title("Confusion Matrix", fontsize=15) | |
plt.xlabel("Predicted") | |
plt.ylabel("True") | |
c2.pyplot() | |
# Calculate Metrics | |
acc = accuracy_score(Y_test, Y_pred) | |
mse = mean_squared_error(Y_test, Y_pred) | |
precision, recall, fscore, train_support = score(Y_test, Y_pred, pos_label=1, average='binary') | |
st.subheader("Metrics of the model: ") | |
st.text('Precision: {} \nRecall: {} \nF1-Score: {} \nAccuracy: {} %\nMean Squared Error: {}'.format( | |
round(precision, 3), round(recall, 3), round(fscore, 3), round((acc * 100), 3), round((mse), 3))) | |
st.markdown("<hr>", unsafe_allow_html=True) | |
st.header(f"1) Model for Prediction of {dataset_select}") | |
st.subheader(f"Classifier Used: {classifier_select}") | |
compute(Y_pred, Y_test) | |
# Execution Time | |
end_time = time.time() | |
st.info(f"Total execution time: {round((end_time - start_time), 4)} seconds") | |
# Get user values | |
def user_inputs_ui(da, data): | |
user_val = {} | |
if dataset_select == "Fitbit Data": | |
X = data.drop(["tag"], axis=1) | |
for col in X.columns: | |
name = col | |
col = st.number_input(col, abs(X[col].min() - round(X[col].std())), abs(X[col].max() + round(X[col].std()))) | |
user_val[name] = round((col), 4) | |
elif dataset_select == "AppleWatch Data": | |
X = data.drop(["activitytag"], axis=1) | |
for col in X.columns: | |
name = col | |
col = st.number_input(col, abs(X[col].min() - round(X[col].std())), abs(X[col].max() + round(X[col].std()))) | |
user_val[name] = col | |
return user_val | |
# User values | |
st.markdown("<hr>", unsafe_allow_html=True) | |
st.header("2) User Values") | |
with st.beta_expander("Learn More"): | |
st.markdown(""" | |
Please fill in your data to see the results.<br> | |
<p style='color: red;'> 1 - High Risk </p> <p style='color: green;'> 0 - Low Risk </p> | |
""", unsafe_allow_html=True) | |
user_val = user_inputs_ui(dataset_select, data) | |
# @st.cache(suppress_st_warning=True) | |
def user_predict(): | |
global U_pred | |
if dataset_select == "AppleWatch Data": | |
X = data.drop(["activitytag"], axis=1) | |
U_pred = clf.predict([[user_val[col] for col in X.columns]]) | |
elif dataset_select == "Fitbit Data": | |
X = data.drop(["tag"], axis=1) | |
U_pred = clf.predict([[user_val[col] for col in X.columns]]) | |
st.subheader("Your Status: ") | |
if U_pred == 0: | |
st.write(U_pred[0], | |
" - NOT A PAIN EVENT -- THIS IS NOT A PROFESSIONAL MEDICAL ADVISE - CONTACT YOUR PRIMARY CARE PROVIDER") | |
else: | |
st.write(U_pred[0], | |
"- POTENTIAL PAIN EVENT; PLEASE SEE YOUR DOCTOR -- THIS IS NOT A PROFESSIONAL MEDICAL ADVISE") | |
user_predict() # Predict the status of user. | |