saifhmb commited on
Commit
01cc70d
·
unverified ·
1 Parent(s): be24054

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing libraries
2
+ from datasets import load_dataset, load_dataset_builder
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+ import sklearn
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.naive_bayes import GaussianNB
12
+ from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
13
+ import imblearn
14
+ from imblearn.under_sampling import RandomUnderSampler
15
+ from skops import hub_utils
16
+ import pickle
17
+ from skops.card import Card, metadata_from_config
18
+ from pathlib import Path
19
+ from tempfile import mkdtemp, mkstemp
20
+
21
+ # Loading the dataset
22
+ dataset_name = "saifhmb/FraudPaymentData"
23
+ dataset = load_dataset(dataset_name, split = 'train')
24
+ dataset = pd.DataFrame(dataset)
25
+
26
+ dataset = dataset.dropna()
27
+ dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) # deleting high cardinality features
28
+ y = dataset.iloc[:, 5].values
29
+ dataset = dataset.drop(['Label'], axis = 1)
30
+ dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature
31
+
32
+ # Encoding the Independent Variables
33
+ categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type']
34
+ onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False)
35
+ categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])
36
+
37
+ numericalColumns = dataset.select_dtypes(include = np.number).columns
38
+ sc = StandardScaler()
39
+ numerical_transformer = Pipeline(steps = [('scale', sc)])
40
+ preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
41
+ preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
42
+ remainder="passthrough")
43
+
44
+ # Spliting the datset into Training and Test set
45
+ X = dataset
46
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42
47
+
48
+ # Train Naive Bayes Model using the Training set
49
+ # Handling imbalanced dataset
50
+ under_sampler = RandomUnderSampler()
51
+ X_under, y_under = under_sampler.fit_resample(X_train, y_train)
52
+
53
+ classifier = GaussianNB() # select the appropriate algorithm for the problem statement
54
+ model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)])
55
+ model.fit(X_under, y_under)
56
+
57
+ # Predicting the Test result
58
+ y_pred = model.predict(X_test)
59
+
60
+ # Making the Confusion Matrix and evaluating performance
61
+ cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
62
+ disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent']))
63
+ disp.plot()
64
+ plt.show()
65
+ acc = accuracy_score(y_test, y_pred)
66
+
67
+ # Pickling the model
68
+ pickle_out = open("model.pkl", "wb")
69
+ pickle.dump(model, pickle_out)
70
+ pickle_out.close()
71
+
72
+ # Loading the model to predict on the data
73
+ pickle_in = open('model.pkl', 'rb')
74
+ model = pickle.load(pickle_in)
75
+
76
+ def welcome():
77
+ return 'welcome all'
78
+
79
+ # defining the function which will make the prediction using the data which the user inputs
80
+ def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type):
81
+ X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type'])
82
+ prediction = model.predict(X)
83
+ print(prediction)
84
+ return prediction
85
+
86
+ # this is the main function in which we define our webpage
87
+ def main():
88
+ # giving the webpage a title
89
+ st.title("Fraud Detection ML App")
90
+ st.header("Model Description", divider = "gray")
91
+ multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containining a large variety of transaction types representing normal activities
92
+ as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent.
93
+ For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model
94
+ '''
95
+ st.markdown(multi)
96
+ st.markdown("To determine whether a transaction is normal or fraudulent, please **ENTER** the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :")
97
+ col1, col2 = st.columns(2)
98
+ with col1:
99
+ Sender_Country = st.text_input("Sender Country")
100
+ with col2:
101
+ Bene_Country = st.text_input("Beneficiary Country")
102
+
103
+ col3, col4 = st.columns(2)
104
+ with col3:
105
+ USD_amount = st.number_input("Amount in USD")
106
+ with col4:
107
+ Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)")
108
+ result = ""
109
+ if st.button("Predict"):
110
+ result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type)
111
+ if result == 0:
112
+ st.success("The output is {}".format(result) + " This is a NORMAL transaction")
113
+ if result == 1:
114
+ st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION")
115
+
116
+ if __name__=='__main__':
117
+ main()
118
+