File size: 6,241 Bytes
e80acd3 e3f7f8d 4034e13 e80acd3 85d617b e80acd3 b5fee77 bbc09a5 e80acd3 72f76c6 4034e13 e3f7f8d e80acd3 e3f7f8d e80acd3 e3f7f8d e80acd3 e3f7f8d e80acd3 e3f7f8d 92d448c e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 16e7cd6 e80acd3 ff2779b e80acd3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# importing libraries
from datasets import load_dataset, load_dataset_builder
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
#from transformers import Trainer, TrainingArguments
from skops import hub_utils
import pickle
from skops.card import Card, metadata_from_config
from pathlib import Path
from tempfile import mkdtemp, mkstemp
import streamlit as st
from PIL import Image
# Loading the dataset
dataset_name = "saifhmb/CreditCardRisk"
dataset = load_dataset(dataset_name, split = 'train')
dataset = pd.DataFrame(dataset)
dataset = dataset.drop(['ID'], axis = 1)
y = dataset.iloc[:, -1].values
dataset = dataset.drop(['RISK'], axis = 1)
# Encoding the Independent Variables
categoricalColumns = ['GENDER', 'MARITAL', 'HOWPAID', 'MORTGAGE']
onehot_categorical = OneHotEncoder(handle_unknown='ignore')
categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)])
numericalColumns = dataset.select_dtypes(include = np.number).columns
sc = StandardScaler()
numerical_transformer = Pipeline(steps = [('scale', sc)])
preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough')
preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)],
remainder="passthrough")
X = dataset
# Encoding the Dependent Variable
le = LabelEncoder()
y = le.fit_transform(y)
# Spliting the datset into Training and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)
# Training Logit Reg Model using the Training set
classifier = LogisticRegression()
model = Pipeline(steps = [('preprocessor', preprocessorForCategoricalColumns),('classifier', classifier)])
model.fit(X_train, y_train)
# Predicting the Test result
y_pred = model.predict(X_test)
# Making the Confusion Matrix and evaluating performance
cm = confusion_matrix(y_pred, y_test, labels=model.classes_)
display_labels = np.array(['bad loss', 'bad profit', 'good risk'])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
disp.plot()
plt.show()
acc = accuracy_score(y_test, y_pred)
ps = precision_score(y_test, y_pred, average ='micro')
rs = recall_score(y_test, y_pred, average ='micro')
# Pickling the model
pickle_out = open("model.pkl", "wb")
pickle.dump(model, pickle_out)
pickle_out.close()
# Loading the model to predict on the data
pickle_in = open('model.pkl', 'rb')
model = pickle.load(pickle_in)
def welcome():
return 'welcome all'
# defining the function which will make the prediction using the data which the user inputs
def prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS):
X = pd.DataFrame([[AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS]], columns = ['AGE', 'INCOME', 'GENDER', 'MARITAL', 'NUMKIDS', 'NUMCARDS', 'HOWPAID', 'MORTGAGE', 'STORECAR', 'LOANS'])
prediction = model.predict(X)
print(prediction)
return prediction
return prediction
# this is the main function in which we define our webpage
def main():
# giving the webpage a title
st.title("Credit Card Risk Assessment ML App")
st.header("Model Description", divider = "gray")
multi = '''This is a logistic regression model trained on customers' credit card risk dataset in a bank using sklearn library.
The model predicts whether a customer is worth issuing a credit card or not.
For more details on the model please refer to the model card at https://huggingface.co/saifhmb/Credit-Card-Risk-Model
'''
st.markdown(multi)
st.markdown("To determine whether a customer is worth issuing a credit card or not, please **ENTER** the AGE INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, and LOANS:")
col1, col2, col3 = st.columns(3)
with col1:
AGE = st.number_input("AGE")
with col2:
INCOME = st.number_input("INCOME")
with col3:
GENDER = st.text_input("GENDER (Please enter 'm' for male and 'f' for female)")
col4, col5, col6 = st.columns(3)
with col4:
MARITAL = st.text_input("MARITAL STATUS (Please enter one of the following options: 'single', 'married', or 'divsepwid')")
with col5:
NUMKIDS = st.number_input("Number of dependent children")
with col6:
NUMCARDS = st.number_input("Number of credit cards excluding store credit cards")
col7, col8, col9 =st.columns(3)
with col7:
HOWPAID = st.text_input("How often is customer paid by employer (weekly or monthly)")
with col8:
MORTGAGE = st.text_input("Does customer have a mortgage? please enter 'y' for yes or 'n' for no")
with col9:
STORECAR = st.number_input("Number of store credit cards")
LOANS = st.number_input("Number of outstanding loans")
result = ""
if st.button("Predict"):
result = prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS)
if result == 0:
st.success("The output is {}".format(result) + " which falls under 'bad loss' and thus the customer is NOT worth issuing a credit card")
if result == 1:
st.success("The output is {}".format(result) + " which falls under 'bad profit' and thus the customer MAYBE worth issuing a credit card")
if result == 2:
st.success("The output is {}".format(result) + " which falls under 'good risk' and thus the customer is worth issuing a credit card")
if __name__=='__main__':
main()
|