Spaces:
Sleeping
Sleeping
File size: 5,490 Bytes
01cc70d 7fbbe14 01cc70d ee90022 01cc70d 471b39d 01cc70d 77ddf3f 01cc70d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# importing libraries
from datasets import load_dataset, load_dataset_builder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
import imblearn
from imblearn.under_sampling import RandomUnderSampler
from skops import hub_utils
import pickle
#from skops.card import Card, metadata_from_config
from pathlib import Path
import streamlit as st
from tempfile import mkdtemp, mkstemp
# Loading the dataset
dataset_name = "saifhmb/FraudPaymentData"
dataset = load_dataset(dataset_name, split = 'train')
dataset = pd.DataFrame(dataset)
dataset = dataset.dropna()
dataset = dataset.drop(['Time_step','Transaction_Id','Sender_Id', 'Sender_Account','Bene_Id','Bene_Account'], axis = 1) # deleting high cardinality features
y = dataset.iloc[:, 5].values
dataset = dataset.drop(['Label'], axis = 1)
dataset = dataset.drop(['Sender_lob', 'Sender_Sector'], axis = 1) # delete column since there is only a single unique value for 'Sender_lob' and 'Sender_sector' is a high cardinal feature
# Encoding the Independent Variables
categoricalColumns = ['Sender_Country', 'Bene_Country', 'Transaction_Type']
onehot_categorical = OneHotEncoder(handle_unknown='ignore', sparse_output= False)
categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])
numericalColumns = dataset.select_dtypes(include = np.number).columns
sc = StandardScaler()
numerical_transformer = Pipeline(steps = [('scale', sc)])
preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
remainder="passthrough")
# Spliting the datset into Training and Test set
X = dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 42) # random state is 0 or 42
# Train Naive Bayes Model using the Training set
# Handling imbalanced dataset
under_sampler = RandomUnderSampler()
X_under, y_under = under_sampler.fit_resample(X_train, y_train)
classifier = GaussianNB() # select the appropriate algorithm for the problem statement
model = Pipeline(steps = [('preprocessorAll', preprocessorForAllColumns),('classifier', classifier)])
model.fit(X_under, y_under)
# Predicting the Test result
y_pred = model.predict(X_test)
# Making the Confusion Matrix and evaluating performance
cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.array(['0 - Normal', '1 - Fraudulent']))
disp.plot()
plt.show()
acc = accuracy_score(y_test, y_pred)
# Pickling the model
pickle_out = open("model.pkl", "wb")
pickle.dump(model, pickle_out)
pickle_out.close()
# Loading the model to predict on the data
pickle_in = open('model.pkl', 'rb')
model = pickle.load(pickle_in)
def welcome():
return 'welcome all'
# defining the function which will make the prediction using the data which the user inputs
def prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type):
X = pd.DataFrame([[Sender_Country, Bene_Country, USD_amount, Transaction_Type]], columns = ['Sender_Country', 'Bene_Country', 'USD_amount', 'Transaction_Type'])
prediction = model.predict(X)
print(prediction)
return prediction
# this is the main function in which we define our webpage
def main():
# giving the webpage a title
st.title("Fraud Detection ML App")
st.header("Model Description", divider = "gray")
multi = '''This is a Gaussian Naive Bayes model trained on a synthetic dataset, containing a large variety of transaction types representing normal activities
as well as abnormal/fraudulent activities. The model predicts whether a transaction is normal or fraudulent.
For more details on the model please refer to the model card at https://huggingface.co/saifhmb/fraud-detection-model
'''
st.markdown(multi)
st.markdown("To determine whether a transaction is normal or fraudulent, please **ENTER** the Sender Country, Beneficiary Country, Amount in USD and Transaction Type :")
col1, col2 = st.columns(2)
with col1:
Sender_Country = st.text_input("Sender Country")
with col2:
Bene_Country = st.text_input("Beneficiary Country")
col3, col4 = st.columns(2)
with col3:
USD_amount = st.number_input("Amount in USD")
with col4:
Transaction_Type = st.text_input("Transaction Type (Please enter one of the following: make-payment, quick-payment, move-funds, pay-check)")
result = ""
if st.button("Predict"):
result = prediction(Sender_Country, Bene_Country, USD_amount, Transaction_Type)
if result == 0:
st.success("The output is {}".format(result) + " This is a NORMAL transaction")
if result == 1:
st.success("The output is {}".format(result) + " This is a FRAUDULENT TRANSACTION")
if __name__=='__main__':
main()
|