File size: 5,490 Bytes
01cc70d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fbbe14
01cc70d
ee90022
01cc70d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471b39d
01cc70d
 
 
 
 
 
 
77ddf3f
01cc70d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# importing libraries
from datasets import load_dataset, load_dataset_builder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
import imblearn
from imblearn.under_sampling import RandomUnderSampler
from skops import hub_utils
import pickle
#from skops.card import Card, metadata_from_config
from pathlib import Path
import streamlit as st
from tempfile import mkdtemp, mkstemp

# Loading the dataset
dataset_name = "saifhmb/FraudPaymentData"
dataset = load_dataset(dataset_name, split = 'train')
dataset = pd.DataFrame(dataset)

dataset = dataset.dropna()
dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) #  deleting high cardinality features 
y = dataset.iloc[:, 5].values
dataset = dataset.drop(['Label'], axis = 1)
dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature

# Encoding the Independent Variables 
categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type']
onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False)
categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])

numericalColumns = dataset.select_dtypes(include = np.number).columns
sc = StandardScaler()
numerical_transformer = Pipeline(steps = [('scale', sc)])
preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
                                            remainder="passthrough")

# Spliting the datset into Training and Test set
X = dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42

# Train Naive Bayes Model using the Training set
# Handling imbalanced dataset
under_sampler = RandomUnderSampler()
X_under, y_under = under_sampler.fit_resample(X_train, y_train)

classifier = GaussianNB() # select the appropriate algorithm for the problem statement
model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)])
model.fit(X_under, y_under)

# Predicting the Test result
y_pred = model.predict(X_test)

# Making the Confusion Matrix and evaluating performance
cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent']))
disp.plot()
plt.show()
acc = accuracy_score(y_test, y_pred)

# Pickling the model
pickle_out = open("model.pkl", "wb") 
pickle.dump(model, pickle_out) 
pickle_out.close()

# Loading the model to predict on the data
pickle_in = open('model.pkl', 'rb') 
model = pickle.load(pickle_in) 

def welcome(): 
    return 'welcome all'

# defining the function which will make the prediction using the data which the user inputs 
def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type):
  X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type'])
  prediction = model.predict(X)
  print(prediction)
  return prediction

# this is the main function in which we define our webpage 
def main(): 
      # giving the webpage a title 
    st.title("Fraud Detection ML App") 
    st.header("Model Description", divider = "gray")
    multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containing a large variety of transaction types representing normal activities 
    as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent.
    For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model
    '''
    st.markdown(multi)
    st.markdown("To determine whether a transaction is normal or fraudulent, please **ENTER** the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :")
    col1, col2 = st.columns(2)
    with col1:
      Sender_Country = st.text_input("Sender Country")
    with col2:  
      Bene_Country = st.text_input("Beneficiary Country")
    
    col3, col4 = st.columns(2)
    with col3:
      USD_amount = st.number_input("Amount in USD")
    with col4:
      Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)")
    result = ""
    if st.button("Predict"):
        result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type)
        if result == 0:
            st.success("The output is {}".format(result) + " This is a NORMAL transaction")
        if result == 1:
            st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION")
    
if __name__=='__main__': 
    main()