|
|
|
from datasets import load_dataset, load_dataset_builder |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import pandas as pd |
|
import sklearn |
|
from sklearn.pipeline import Pipeline |
|
from sklearn.compose import make_column_transformer |
|
from sklearn.compose import make_column_selector |
|
from sklearn.compose import ColumnTransformer |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report |
|
|
|
from skops import hub_utils |
|
import pickle |
|
from skops.card import Card, metadata_from_config |
|
from pathlib import Path |
|
from tempfile import mkdtemp, mkstemp |
|
import streamlit as st |
|
from PIL import Image |
|
|
|
|
|
dataset_name = "saifhmb/CreditCardRisk" |
|
dataset = load_dataset(dataset_name, split = 'train') |
|
dataset = pd.DataFrame(dataset) |
|
|
|
dataset = dataset.drop(['ID'], axis = 1) |
|
y = dataset.iloc[:, -1].values |
|
dataset = dataset.drop(['RISK'], axis = 1) |
|
|
|
|
|
categoricalColumns = ['GENDER', 'MARITAL', 'HOWPAID', 'MORTGAGE'] |
|
onehot_categorical = OneHotEncoder(handle_unknown='ignore') |
|
categorical_transformer = Pipeline(steps = [('onehot', onehot_categorical)]) |
|
|
|
numericalColumns = dataset.select_dtypes(include = np.number).columns |
|
sc = StandardScaler() |
|
numerical_transformer = Pipeline(steps = [('scale', sc)]) |
|
|
|
preprocessorForCategoricalColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns)], remainder ='passthrough') |
|
preprocessorForAllColumns = ColumnTransformer(transformers=[('cat', categorical_transformer, categoricalColumns),('num',numerical_transformer,numericalColumns)], |
|
remainder="passthrough") |
|
X = dataset |
|
|
|
|
|
le = LabelEncoder() |
|
y = le.fit_transform(y) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0) |
|
|
|
|
|
classifier = LogisticRegression() |
|
model = Pipeline(steps = [('preprocessor', preprocessorForCategoricalColumns),('classifier', classifier)]) |
|
model.fit(X_train, y_train) |
|
|
|
|
|
y_pred = model.predict(X_test) |
|
|
|
|
|
cm = confusion_matrix(y_pred, y_test, labels=model.classes_) |
|
display_labels = np.array(['bad loss', 'bad profit', 'good risk']) |
|
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels) |
|
disp.plot() |
|
plt.show() |
|
acc = accuracy_score(y_test, y_pred) |
|
ps = precision_score(y_test, y_pred, average ='micro') |
|
rs = recall_score(y_test, y_pred, average ='micro') |
|
|
|
|
|
pickle_out = open("model.pkl", "wb") |
|
pickle.dump(model, pickle_out) |
|
pickle_out.close() |
|
|
|
|
|
pickle_in = open('model.pkl', 'rb') |
|
model = pickle.load(pickle_in) |
|
|
|
def welcome(): |
|
return 'welcome all' |
|
|
|
|
|
def prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS): |
|
X = pd.DataFrame([[AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS]], columns = ['AGE', 'INCOME', 'GENDER', 'MARITAL', 'NUMKIDS', 'NUMCARDS', 'HOWPAID', 'MORTGAGE', 'STORECAR', 'LOANS']) |
|
prediction = model.predict(X) |
|
print(prediction) |
|
return prediction |
|
|
|
return prediction |
|
|
|
|
|
def main(): |
|
|
|
st.title("Credit Card Risk Assessment ML App") |
|
st.header("Model Description", divider = "gray") |
|
multi = '''This is a logistic regression model trained on customers' credit card risk dataset in a bank using sklearn library. |
|
The model predicts whether a customer is worth issuing a credit card or not. |
|
For more details on the model please refer to the model card at https://huggingface.co/saifhmb/Credit-Card-Risk-Model |
|
''' |
|
st.markdown(multi) |
|
st.markdown("To determine whether a customer is worth issuing a credit card or not, please **ENTER** the AGE INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, and LOANS:") |
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
AGE = st.number_input("AGE") |
|
with col2: |
|
INCOME = st.number_input("INCOME") |
|
with col3: |
|
GENDER = st.text_input("GENDER (Please enter 'm' for male and 'f' for female)") |
|
|
|
col4, col5, col6 = st.columns(3) |
|
with col4: |
|
MARITAL = st.text_input("MARITAL STATUS (Please enter one of the following options: 'single', 'married', or 'divsepwid')") |
|
with col5: |
|
NUMKIDS = st.number_input("Number of dependent children") |
|
with col6: |
|
NUMCARDS = st.number_input("Number of credit cards excluding store credit cards") |
|
|
|
col7, col8, col9 =st.columns(3) |
|
with col7: |
|
HOWPAID = st.text_input("How often is customer paid by employer (weekly or monthly)") |
|
with col8: |
|
MORTGAGE = st.text_input("Does customer have a mortgage? please enter 'y' for yes or 'n' for no") |
|
with col9: |
|
STORECAR = st.number_input("Number of store credit cards") |
|
|
|
LOANS = st.number_input("Number of outstanding loans") |
|
result = "" |
|
if st.button("Predict"): |
|
result = prediction(AGE, INCOME, GENDER, MARITAL, NUMKIDS, NUMCARDS, HOWPAID, MORTGAGE, STORECAR, LOANS) |
|
if result == 0: |
|
st.success("The output is {}".format(result) + " which falls under 'bad loss' and thus the customer is NOT worth issuing a credit card") |
|
if result == 1: |
|
st.success("The output is {}".format(result) + " which falls under 'bad profit' and thus the customer MAYBE worth issuing a credit card") |
|
if result == 2: |
|
st.success("The output is {}".format(result) + " which falls under 'good risk' and thus the customer is worth issuing a credit card") |
|
|
|
if __name__=='__main__': |
|
main() |
|
|
|
|
|
|