File size: 6,241 Bytes
e80acd3
 
 
 
 
 
e3f7f8d
4034e13
 
e80acd3
 
 
 
 
85d617b
e80acd3
 
 
 
 
 
 
 
 
 
 
 
b5fee77
bbc09a5
e80acd3
72f76c6
 
4034e13
e3f7f8d
 
 
e80acd3
e3f7f8d
 
 
 
 
 
 
 
e80acd3
 
 
 
 
 
 
 
 
e3f7f8d
 
e80acd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3f7f8d
 
e80acd3
e3f7f8d
92d448c
e80acd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16e7cd6
e80acd3
16e7cd6
e80acd3
16e7cd6
e80acd3
 
 
16e7cd6
e80acd3
16e7cd6
e80acd3
16e7cd6
e80acd3
 
 
16e7cd6
e80acd3
16e7cd6
e80acd3
16e7cd6
e80acd3
 
 
 
 
 
 
 
 
 
ff2779b
e80acd3
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# importing libraries
from datasets import load_dataset, load_dataset_builder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
#from transformers import Trainer, TrainingArguments
from skops import hub_utils
import pickle
from skops.card import Card, metadata_from_config
from pathlib import Path
from tempfile import mkdtemp, mkstemp
import streamlit as st
from PIL import Image

# Loading the dataset
dataset_name = "saifhmb/CreditCardRisk"
dataset = load_dataset(dataset_name, split = 'train')
dataset = pd.DataFrame(dataset)

dataset = dataset.drop(['ID'], axis = 1)
y = dataset.iloc[:, -1].values
dataset = dataset.drop(['RISK'], axis = 1)

# Encoding the Independent Variables 
categoricalColumns = ['GENDER', 'MARITAL', 'HOWPAID', 'MORTGAGE']
onehot_categorical = OneHotEncoder(handle_unknown='ignore')
categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])

numericalColumns = dataset.select_dtypes(include = np.number).columns
sc = StandardScaler()
numerical_transformer = Pipeline(steps = [('scale', sc)])

preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
                                            remainder="passthrough")
X = dataset

# Encoding the Dependent Variable
le = LabelEncoder()
y = le.fit_transform(y)

# Spliting the datset into Training and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)

# Training Logit Reg Model using the Training set
classifier = LogisticRegression()
model = Pipeline(steps = [('preprocessor', preprocessorForCategoricalColumns),('classifier', classifier)])
model.fit(X_train, y_train)

# Predicting the Test result
y_pred = model.predict(X_test)

# Making the Confusion Matrix and evaluating performance
cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
display_labels = np.array(['bad loss', 'bad profit', 'good risk'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
disp.plot()
plt.show()
acc = accuracy_score(y_test, y_pred)
ps = precision_score(y_test, y_pred, average ='micro')
rs = recall_score(y_test, y_pred, average ='micro')

# Pickling the model
pickle_out = open("model.pkl", "wb") 
pickle.dump(model, pickle_out) 
pickle_out.close()

# Loading the model to predict on the data
pickle_in = open('model.pkl', 'rb') 
model = pickle.load(pickle_in) 

def welcome(): 
    return 'welcome all'

# defining the function which will make the prediction using the data which the user inputs 
def prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS):
    X = pd.DataFrame([[AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS]], columns = ['AGE', 'INCOME', 'GENDER', 'MARITAL', 'NUMKIDS', 'NUMCARDS', 'HOWPAID', 'MORTGAGE', 'STORECAR', 'LOANS'])
    prediction = model.predict(X)
    print(prediction)
    return prediction
    
    return prediction
  
# this is the main function in which we define our webpage  
def main(): 
      # giving the webpage a title 
    st.title("Credit Card Risk Assessment ML App") 
    st.header("Model Description", divider = "gray")
    multi = '''This is a logistic regression model trained on customers' credit card risk dataset in a bank using sklearn library. 
    The model predicts whether a customer is worth issuing a credit card or not.
    For more details on the model please refer to the model card at https://huggingface.co/saifhmb/Credit-Card-Risk-Model
    '''
    st.markdown(multi)
    st.markdown("To determine whether a customer is worth issuing a credit card or not, please **ENTER** the AGE INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, and LOANS:")
    col1, col2, col3 = st.columns(3)
    with col1:
        AGE = st.number_input("AGE")
    with col2:
        INCOME = st.number_input("INCOME")
    with col3:
        GENDER = st.text_input("GENDER (Please enter 'm' for male and 'f' for female)")
    
    col4, col5, col6 = st.columns(3)
    with col4:
        MARITAL = st.text_input("MARITAL STATUS (Please enter one of the following options: 'single', 'married', or 'divsepwid')")
    with col5:
        NUMKIDS = st.number_input("Number of dependent children")
    with col6:
        NUMCARDS = st.number_input("Number of credit cards excluding store credit cards")

    col7, col8, col9 =st.columns(3)
    with col7:
        HOWPAID = st.text_input("How often is customer paid by employer (weekly or monthly)")
    with col8:
        MORTGAGE = st.text_input("Does customer have a mortgage? please enter 'y' for yes or 'n' for no")
    with col9:
        STORECAR = st.number_input("Number of store credit cards")

    LOANS = st.number_input("Number of outstanding loans")  
    result = ""
    if st.button("Predict"):
        result = prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS)
        if result == 0:
            st.success("The output is {}".format(result) + " which falls under 'bad loss' and thus the customer is NOT worth issuing a credit card")
        if result == 1:
            st.success("The output is {}".format(result) + " which falls under 'bad profit' and thus the customer MAYBE worth issuing a credit card")
        if result == 2:
            st.success("The output is {}".format(result) + " which falls under 'good risk' and thus the customer is worth issuing a credit card")

if __name__=='__main__': 
    main()