7jimmy commited on
Commit
47696d4
·
verified ·
1 Parent(s): 29f2d46

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score
8
+ from sklearn.preprocessing import StandardScaler
9
+ import gradio as gr
10
+
11
+ def credit_card_fraud_detection():
12
+ # Load the dataset
13
+ credit_card_data = pd.read_csv("creditcard.csv")
14
+
15
+ # Check for missing values
16
+ missing_values = credit_card_data.isnull().sum()
17
+
18
+ # Check class distribution
19
+ class_distribution = credit_card_data['Class'].value_counts()
20
+
21
+ # Visualize class distribution
22
+ sns.countplot(x='Class', data=credit_card_data)
23
+ plt.title('Class Distribution')
24
+ plt.show()
25
+
26
+ """This Dataset is highly unbalanced
27
+ 0 --> Normal Transaction
28
+ 1 --> fraudulent transaction
29
+ """
30
+
31
+ # separating the data for analysis
32
+ legit = credit_card_data[credit_card_data.Class == 0]
33
+ fraud = credit_card_data[credit_card_data.Class == 1]
34
+
35
+ # statistical measures of the data
36
+ legit_amount_stats = legit.Amount.describe()
37
+ fraud_amount_stats = fraud.Amount.describe()
38
+
39
+ # compare the values for both transactions
40
+ class_means = credit_card_data.groupby('Class').mean()
41
+
42
+ """Under-Sampling
43
+ Build a sample dataset containing a similar distribution of normal transactions and Fraudulent Transactions
44
+ Number of Fraudulent Transactions --> 492
45
+ """
46
+
47
+ legit_sample = legit.sample(n=492)
48
+
49
+ """Concatenating two DataFrames"""
50
+ new_dataset = pd.concat([legit_sample, fraud], axis=0)
51
+
52
+ class_value_counts = new_dataset['Class'].value_counts()
53
+
54
+ class_means_new_dataset = new_dataset.groupby('Class').mean()
55
+
56
+ """Splitting the data into Features & Targets"""
57
+ X = new_dataset.drop(columns='Class', axis=1)
58
+ Y = new_dataset['Class']
59
+
60
+ """Split the data into Training data & Testing Data"""
61
+ X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
62
+
63
+ """Model Training - Logistic Regression"""
64
+ model = LogisticRegression()
65
+
66
+ # training the Logistic Regression Model with Training Data
67
+ model.fit(X_train, Y_train)
68
+
69
+ """Model Evaluation - Accuracy Score"""
70
+ X_train_prediction = model.predict(X_train)
71
+ training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
72
+
73
+ # accuracy on test data
74
+ X_test_prediction = model.predict(X_test)
75
+ test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
76
+
77
+ return {
78
+ 'missing_values': missing_values,
79
+ 'class_distribution': class_distribution,
80
+ 'legit_amount_stats': legit_amount_stats,
81
+ 'fraud_amount_stats': fraud_amount_stats,
82
+ 'class_means': class_means,
83
+ 'class_value_counts': class_value_counts,
84
+ 'class_means_new_dataset': class_means_new_dataset,
85
+ 'training_data_accuracy': training_data_accuracy,
86
+ 'test_data_accuracy': test_data_accuracy
87
+ }
88
+
89
+ # Launching the Gradio Interface
90
+ iface = gr.Interface(fn=credit_card_fraud_detection, title="Credit Card Fraud Detection")
91
+ iface.launch()