Tenefix commited on
Commit
34c497e
·
verified ·
1 Parent(s): ed8ee84

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset/card_transdata.csv filter=lfs diff=lfs merge=lfs -text
dataset/card_transdata.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7013c329bae9ef0ef32d65dbeb095694f0c7cd6c00ff74b2d0087fa1c67b8717
3
+ size 76277977
models/fhe_files/client.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322b3976b22d5ba9716880a6f414d5646709d0cf1604b708aaf2ce12194ab98f
3
+ size 15633
models/fhe_files/server.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5fef601c70314eed11b39416aaa3cca9f3e0a8e63c8f53b97d3268797e3e56e
3
+ size 9682
models/fhe_model.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for training and deploying an FHE-enabled
3
+ Random Forest model using Concrete ML.
4
+ """
5
+
6
+ import os
7
+ import pandas as pd
8
+ import joblib
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.preprocessing import StandardScaler
11
+ from concrete.ml.sklearn.rf import RandomForestClassifier
12
+ from concrete.ml.deployment import FHEModelDev
13
+
14
+ # Load the data (100,000 rows only)
15
+ DATA_PATH = os.path.join(os.path.abspath(os.getcwd()), "dataset", "card_transdata.csv")
16
+ df = pd.read_csv(DATA_PATH, nrows=100000) # Limit to 100,000 rows
17
+
18
+ # Check for missing values
19
+ if df.isnull().sum().any():
20
+ df = df.dropna()
21
+
22
+ # Handle class imbalance
23
+ fraud = df[df["fraud"] == 1]
24
+ non_fraud = df[df["fraud"] == 0].sample(n=len(fraud), random_state=42)
25
+ balanced_df = pd.concat([fraud, non_fraud])
26
+
27
+ # Separate features and target
28
+ X = balanced_df.drop(columns=["fraud"])
29
+ y = balanced_df["fraud"].astype(int)
30
+
31
+ # Split into training and validation sets
32
+ X_train, X_val, y_train, y_val = train_test_split(
33
+ X, y, test_size=0.2, random_state=42, stratify=y
34
+ )
35
+
36
+ # Preprocessing: scale the data
37
+ scaler = StandardScaler()
38
+ X_train_scaled = scaler.fit_transform(X_train)
39
+ X_val_scaled = scaler.transform(X_val)
40
+
41
+ # Save the scaler for later use
42
+ SCALER_PATH = os.path.join(os.path.abspath(os.getcwd()), "models", "scaler.pkl")
43
+ joblib.dump(scaler, SCALER_PATH)
44
+
45
+ # Train the Random Forest model with Concrete ML
46
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
47
+ model.fit(X_train_scaled, y_train)
48
+
49
+ # Compile the model for homomorphic encryption
50
+ model.compile(X_train_scaled)
51
+
52
+ # Save the model and necessary files for client and server
53
+ FHE_DIRECTORY = os.path.join(os.path.abspath(os.getcwd()), "models", "fhe_files")
54
+ dev = FHEModelDev(path_dir=FHE_DIRECTORY, model=model)
55
+ dev.save()
56
+
57
+ print("Model trained, compiled, and saved.")
models/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d05215c687e429d53fd9e2ef3b461461e13b6f65ff532ca8d1ce55b89067de7a
3
+ size 1231