File size: 3,510 Bytes
5306193
32965bb
 
0489425
 
 
 
 
5306193
 
 
 
ecec68f
5306193
0489425
 
 
 
 
 
 
 
 
 
 
 
5306193
 
0489425
ecec68f
 
 
 
 
5306193
 
 
 
 
0489425
ecec68f
 
 
 
 
5306193
 
ecec68f
0489425
ecec68f
5306193
0489425
ecec68f
5306193
0489425
ecec68f
5306193
 
 
0489425
ecec68f
 
 
32965bb
5306193
 
 
 
32965bb
5306193
ecec68f
5306193
32965bb
ecec68f
5306193
 
ecec68f
 
5306193
 
ecec68f
0489425
5306193
0489425
 
ecec68f
 
0489425
 
ecec68f
0489425
 
 
 
5306193
 
 
 
 
ecec68f
 
5306193
 
ecec68f
5306193
 
 
ecec68f
eb57486
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
import os
import shutil
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from concrete.ml.sklearn.tree import DecisionTreeClassifier as FHEDecisionTreeClassifier
from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

# Define the directory for FHE client/server files
fhe_directory = '/tmp/fhe_client_server_files/'

# Create the directory if it does not exist
if not os.path.exists(fhe_directory):
    os.makedirs(fhe_directory)
else:
    # If it exists, delete its contents
    shutil.rmtree(fhe_directory)
    os.makedirs(fhe_directory)

# Load the data
data = pd.read_csv('data/heart.xls')

st.write("### Data Overview")
st.write(data.head())
data.info()  # Show info in the Streamlit app

# Correlation analysis
data_corr = data.corr()
plt.figure(figsize=(20, 20))
sns.heatmap(data=data_corr, annot=True)
st.write("### Correlation Heatmap")
st.pyplot(plt)

feature_value = np.array(data_corr['output'])
for i in range(len(feature_value)):
    if feature_value[i] < 0:
        feature_value[i] = -feature_value[i]

features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
feature_selected = feature_sorted.index

st.write("### Selected Features")
st.write(feature_selected)

# Clean the data by selecting the most correlated features
clean_data = data[feature_selected]

# Prepare the dataset for training
X = clean_data.iloc[:, 1:]
Y = clean_data['output']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

st.write("### Training Data Shape")
st.write(f"X Train Shape: {x_train.shape}, Y Train Shape: {y_train.shape}")
st.write(f"X Test Shape: {x_test.shape}, Y Test Shape: {y_test.shape}")

# Feature scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Train the model
dt = DecisionTreeClassifier(criterion='entropy', max_depth=6)
dt.fit(x_train, y_train)

# Predict and evaluate
y_pred = dt.predict(x_test)
conf_mat = confusion_matrix(y_test, y_pred)
accuracy = dt.score(x_test, y_test)

st.write("### Confusion Matrix")
st.write(conf_mat)
st.write(f"### Accuracy: {round(accuracy * 100, 2)}%")

# Save the model
joblib.dump(dt, 'heart_disease_dt_model.pkl')

# Convert the model for FHE
fhe_compatible = FHEDecisionTreeClassifier.from_sklearn_model(dt, x_train, n_bits=10)
fhe_compatible.compile(x_train)

# Setup the server
dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
dev.save()
server = FHEModelServer(path_dir=fhe_directory)
server.load()

# Setup the client
client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
serialized_evaluation_keys = client.get_serialized_evaluation_keys()

# Load the dataset and select the relevant features for prediction
sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1)  # First sample for prediction
encrypted_data = client.quantize_encrypt_serialize(sample_data)

# Run the server with encrypted data
encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
result = client.deserialize_decrypt_dequantize(encrypted_result)

st.write("### Encrypted Prediction Result")
st.write(result)