Spaces:

ppaihack
/

ZamaKlinikV2

Sleeping

App Files Files Community

lukalafaye commited on Sep 28, 2024

Commit

5306193

1 Parent(s): cd05ce2

fix

Browse files

Files changed (2) hide show

app.py +83 -93
server2.py +0 -150

app.py CHANGED Viewed

@@ -1,13 +1,18 @@
 import numpy as np
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 import joblib
-from sklearn.tree import DecisionTreeClassifier, XGBClassifier                   #using sklearn decisiontreeclassifier
-from sklearn.model_selection import train_test_split
 import os
 import shutil
 # Define the directory for FHE client/server files
 fhe_directory = '/tmp/fhe_client_server_files/'
@@ -20,113 +25,68 @@ else:
     shutil.rmtree(fhe_directory)
     os.makedirs(fhe_directory)
-data=pd.read_csv('data/heart.xls')
-data.info()   #checking the info
-data_corr=data.corr()
-plt.figure(figsize=(20,20))
-sns.heatmap(data=data_corr,annot=True)
-#Heatmap for data
-"""
-# Get the Data
-X_train, y_train, X_val, y_val = train_test_split()
-classifier = XGBClassifier()
-# Training the Model
-classifier = classifier.fit(X_train, y_train)
-# Trained Model Evaluation on Validation Dataset
-confidence = classifier.score(X_val, y_val)
-# Validation Data Prediction
-y_pred = classifier.predict(X_val)
-# Model Validation Accuracy
-accuracy = accuracy_score(y_val, y_pred)
-# Model Confusion Matrix
-conf_mat = confusion_matrix(y_val, y_pred)
-# Model Classification Report
-clf_report = classification_report(y_val, y_pred)
-# Model Cross Validation Score
-score = cross_val_score(classifier, X_val, y_val, cv=3)
-try:
-    # Load Trained Model
-    clf = load(str(self.model_save_path + saved_model_name + ".joblib"))
-except Exception as e:
-    print("Model not found...")
-if test_data is not None:
-    result = clf.predict(test_data)
-    print(result)
-else:
-    result = clf.predict(self.test_features)
-accuracy = accuracy_score(self.test_labels, result)
-clf_report = classification_report(self.test_labels, result)
-print(accuracy, clf_report)
-"""
-####################
-feature_value=np.array(data_corr['output'])
-for i in range(len(feature_value)):
-    if feature_value[i]<0:
-        feature_value[i]=-feature_value[i]
-print(feature_value)
-features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
-feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
-feature_selected=feature_sorted.index
-feature_selected     #selected features which are very much correalated
-clean_data=data[feature_selected]
-#making input and output dataset
-X=clean_data.iloc[:,1:]
-Y=clean_data['output']
-x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
-print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)     #data is splited in traing and testing dataset
-# feature scaling
-from sklearn.preprocessing import StandardScaler
-sc=StandardScaler()
-x_train=sc.fit_transform(x_train)
-x_test=sc.transform(x_test)
-#training our model
-dt=XGBClassifier(criterion='entropy',max_depth=6)
-dt.fit(x_train,y_train)
-#dt.compile(x_trqin)
-#predicting the value on testing data
-y_pred=dt.predict(x_test)
-#ploting the data
-from sklearn.metrics import confusion_matrix
-conf_mat=confusion_matrix(y_test,y_pred)
-print(conf_mat)
-accuracy=dt.score(x_test,y_test)
-print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
 joblib.dump(dt, 'heart_disease_dt_model.pkl')
-from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier
-from concrete.ml.sklearn import XGBClassifier as ConcreteXGBClassifier
-fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10) #de FHE
 fhe_compatible.compile(x_train)
-#### server
-from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
 # Setup the development environment
 dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
 dev.save()
@@ -134,3 +94,33 @@ dev.save()
 # Setup the server
 server = FHEModelServer(path_dir=fhe_directory)
 server.load()

+import streamlit as st
 import numpy as np
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 import joblib
 import os
 import shutil
+from xgboost import XGBClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
+from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
 # Define the directory for FHE client/server files
 fhe_directory = '/tmp/fhe_client_server_files/'
     shutil.rmtree(fhe_directory)
     os.makedirs(fhe_directory)
+# Streamlit title
+st.title("Heart Disease Prediction Model")
+# Load the data
+data = pd.read_csv('data/heart.xls')
+st.write("### Dataset Information")
+st.write(data.info())
+# Correlation matrix
+data_corr = data.corr()
+plt.figure(figsize=(20, 20))
+sns.heatmap(data=data_corr, annot=True)
+st.write("### Correlation Heatmap")
+st.pyplot(plt)
+# Feature selection based on correlation
+feature_value = np.abs(data_corr['output'])  # Use absolute values for correlation
+features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
+feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
+feature_selected = feature_sorted.index.tolist()
+st.write("### Selected Features Based on Correlation")
+st.write(feature_selected)
+clean_data = data[feature_selected]
+# Prepare data for model training
+X = clean_data.iloc[:, 1:]
+Y = clean_data['output']
+x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)
+st.write("### Training and Test Set Shapes")
+st.write(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")
+# Feature scaling
+sc = StandardScaler()
+x_train = sc.fit_transform(x_train)
+x_test = sc.transform(x_test)
+# Train the model
+dt = XGBClassifier(max_depth=6)
+dt.fit(x_train, y_train)
+# Make predictions
+y_pred = dt.predict(x_test)
+# Confusion matrix
+conf_mat = confusion_matrix(y_test, y_pred)
+st.write("### Confusion Matrix")
+st.write(conf_mat)
+# Model accuracy
+accuracy = dt.score(x_test, y_test)
+st.write(f"### Model Accuracy: {round(accuracy * 100, 2)}%")
+# Save the model
 joblib.dump(dt, 'heart_disease_dt_model.pkl')
+# Prepare FHE compatible model
+fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits=10)
 fhe_compatible.compile(x_train)
 # Setup the development environment
 dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
 dev.save()
 # Setup the server
 server = FHEModelServer(path_dir=fhe_directory)
 server.load()
+# Setup the client
+client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
+serialized_evaluation_keys = client.get_serialized_evaluation_keys()
+# Load the dataset and perform correlation analysis
+data = pd.read_csv('data/heart.xls')
+data_corr = data.corr()
+# Select features based on correlation with 'output'
+feature_value = np.abs(data_corr['output'])
+features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
+feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
+feature_selected = feature_sorted.index.tolist()
+# Clean the data by selecting the most correlated features
+clean_data = data[feature_selected]
+# Extract the first row of feature data for prediction
+sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1)  # Reshape to 2D array for model input
+# Encrypt the sample data
+encrypted_data = client.quantize_encrypt_serialize(sample_data)
+# Run the server and get results
+encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
+result = client.deserialize_decrypt_dequantize(encrypted_result)
+st.write("### Prediction Result")
+st.write(result)

server2.py DELETED Viewed

@@ -1,150 +0,0 @@
-import numpy as np
-import pandas as pd
-import seaborn as sns
-import matplotlib.pyplot as plt
-import joblib
-import os
-import shutil
-# Define the directory for FHE client/server files
-fhe_directory = '/tmp/fhe_client_server_files/'
-# Create the directory if it does not exist
-if not os.path.exists(fhe_directory):
-    os.makedirs(fhe_directory)
-else:
-    # If it exists, delete its contents
-    shutil.rmtree(fhe_directory)
-    os.makedirs(fhe_directory)
-data=pd.read_csv('data/heart.xls')
-data.info()   #checking the info
-data_corr=data.corr()
-plt.figure(figsize=(20,20))
-sns.heatmap(data=data_corr,annot=True)
-#Heatmap for data
-feature_value=np.array(data_corr['output'])
-for i in range(len(feature_value)):
-    if feature_value[i]<0:
-        feature_value[i]=-feature_value[i]
-print(feature_value)
-features_corr=pd.DataFrame(feature_value,index=data_corr['output'].index,columns=['correalation'])
-feature_sorted=features_corr.sort_values(by=['correalation'],ascending=False)
-feature_selected=feature_sorted.index
-feature_selected     #selected features which are very much correalated
-clean_data=data[feature_selected]
-from xgboost import XGBClassifier
-from sklearn.tree import DecisionTreeClassifier                  #using sklearn decisiontreeclassifier
-from sklearn.model_selection import train_test_split
-#making input and output dataset
-X=clean_data.iloc[:,1:]
-Y=clean_data['output']
-x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.25,random_state=0)
-print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)     #data is splited in traing and testing dataset
-# feature scaling
-from sklearn.preprocessing import StandardScaler
-sc=StandardScaler()
-x_train=sc.fit_transform(x_train)
-x_test=sc.transform(x_test)
-#training our model
-dt=XGBClassifier(max_depth=6)
-dt.fit(x_train,y_train)
-#dt.compile(x_trqin)
-#predicting the value on testing data
-y_pred=dt.predict(x_test)
-#ploting the data
-from sklearn.metrics import confusion_matrix
-conf_mat=confusion_matrix(y_test,y_pred)
-print(conf_mat)
-accuracy=dt.score(x_test,y_test)
-print("\nThe accuracy of decisiontreelassifier on Heart disease prediction dataset is "+str(round(accuracy*100,2))+"%")
-joblib.dump(dt, 'heart_disease_dt_model.pkl')
-from concrete.ml.sklearn.tree import XGBClassifier as ConcreteXGBClassifier
-fhe_compatible = ConcreteXGBClassifier.from_sklearn_model(dt, x_train, n_bits = 10)
-fhe_compatible.compile(x_train)
-#### server
-from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
-# Setup the development environment
-dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
-dev.save()
-# Setup the server
-server = FHEModelServer(path_dir=fhe_directory)
-server.load()
-####### client
-from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer
-# Setup the client
-client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
-serialized_evaluation_keys = client.get_serialized_evaluation_keys()
-# Load the dataset and select the relevant features
-data = pd.read_csv('data/heart.xls')
-# Perform the correlation analysis
-data_corr = data.corr()
-# Select features based on correlation with 'output'
-feature_value = np.array(data_corr['output'])
-for i in range(len(feature_value)):
-    if feature_value[i] < 0:
-        feature_value[i] = -feature_value[i]
-features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
-feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
-feature_selected = feature_sorted.index
-# Clean the data by selecting the most correlated features
-clean_data = data[feature_selected]
-# Extract the first row of feature data for prediction (excluding 'output' column)
-sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1)  # Reshape to 2D array for model input
-encrypted_data = client.quantize_encrypt_serialize(sample_data)
-##### end client
-encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
-result = client.deserialize_decrypt_dequantize(encrypted_result)
-print(result)