Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.linear_model import LogisticRegression
|
7 |
+
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score
|
8 |
+
from sklearn.preprocessing import StandardScaler
|
9 |
+
import gradio as gr
|
10 |
+
|
11 |
+
def credit_card_fraud_detection():
|
12 |
+
# Load the dataset
|
13 |
+
credit_card_data = pd.read_csv("creditcard.csv")
|
14 |
+
|
15 |
+
# Check for missing values
|
16 |
+
missing_values = credit_card_data.isnull().sum()
|
17 |
+
|
18 |
+
# Check class distribution
|
19 |
+
class_distribution = credit_card_data['Class'].value_counts()
|
20 |
+
|
21 |
+
# Visualize class distribution
|
22 |
+
sns.countplot(x='Class', data=credit_card_data)
|
23 |
+
plt.title('Class Distribution')
|
24 |
+
plt.show()
|
25 |
+
|
26 |
+
"""This Dataset is highly unbalanced
|
27 |
+
0 --> Normal Transaction
|
28 |
+
1 --> fraudulent transaction
|
29 |
+
"""
|
30 |
+
|
31 |
+
# separating the data for analysis
|
32 |
+
legit = credit_card_data[credit_card_data.Class == 0]
|
33 |
+
fraud = credit_card_data[credit_card_data.Class == 1]
|
34 |
+
|
35 |
+
# statistical measures of the data
|
36 |
+
legit_amount_stats = legit.Amount.describe()
|
37 |
+
fraud_amount_stats = fraud.Amount.describe()
|
38 |
+
|
39 |
+
# compare the values for both transactions
|
40 |
+
class_means = credit_card_data.groupby('Class').mean()
|
41 |
+
|
42 |
+
"""Under-Sampling
|
43 |
+
Build a sample dataset containing a similar distribution of normal transactions and Fraudulent Transactions
|
44 |
+
Number of Fraudulent Transactions --> 492
|
45 |
+
"""
|
46 |
+
|
47 |
+
legit_sample = legit.sample(n=492)
|
48 |
+
|
49 |
+
"""Concatenating two DataFrames"""
|
50 |
+
new_dataset = pd.concat([legit_sample, fraud], axis=0)
|
51 |
+
|
52 |
+
class_value_counts = new_dataset['Class'].value_counts()
|
53 |
+
|
54 |
+
class_means_new_dataset = new_dataset.groupby('Class').mean()
|
55 |
+
|
56 |
+
"""Splitting the data into Features & Targets"""
|
57 |
+
X = new_dataset.drop(columns='Class', axis=1)
|
58 |
+
Y = new_dataset['Class']
|
59 |
+
|
60 |
+
"""Split the data into Training data & Testing Data"""
|
61 |
+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
|
62 |
+
|
63 |
+
"""Model Training - Logistic Regression"""
|
64 |
+
model = LogisticRegression()
|
65 |
+
|
66 |
+
# training the Logistic Regression Model with Training Data
|
67 |
+
model.fit(X_train, Y_train)
|
68 |
+
|
69 |
+
"""Model Evaluation - Accuracy Score"""
|
70 |
+
X_train_prediction = model.predict(X_train)
|
71 |
+
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
|
72 |
+
|
73 |
+
# accuracy on test data
|
74 |
+
X_test_prediction = model.predict(X_test)
|
75 |
+
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
|
76 |
+
|
77 |
+
return {
|
78 |
+
'missing_values': missing_values,
|
79 |
+
'class_distribution': class_distribution,
|
80 |
+
'legit_amount_stats': legit_amount_stats,
|
81 |
+
'fraud_amount_stats': fraud_amount_stats,
|
82 |
+
'class_means': class_means,
|
83 |
+
'class_value_counts': class_value_counts,
|
84 |
+
'class_means_new_dataset': class_means_new_dataset,
|
85 |
+
'training_data_accuracy': training_data_accuracy,
|
86 |
+
'test_data_accuracy': test_data_accuracy
|
87 |
+
}
|
88 |
+
|
89 |
+
# Launching the Gradio Interface
|
90 |
+
iface = gr.Interface(fn=credit_card_fraud_detection, title="Credit Card Fraud Detection")
|
91 |
+
iface.launch()
|