saifhmb
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from sklearn.model_selection import train_test_split
|
|
9 |
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
|
10 |
from sklearn.linear_model import LogisticRegression
|
11 |
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
|
12 |
-
|
13 |
from skops import hub_utils
|
14 |
import pickle
|
15 |
from skops.card import Card, metadata_from_config
|
@@ -23,21 +23,16 @@ dataset_name = "saifhmb/CreditCardRisk"
|
|
23 |
dataset = load_dataset(dataset_name, split = 'train')
|
24 |
dataset = pd.DataFrame(dataset)
|
25 |
|
26 |
-
dataset['GENDER'] = dataset['GENDER'].replace(['n', 'y'], [0, 1], inplace = True)
|
27 |
-
|
28 |
-
dataset['MARITAL'] = dataset['MARITAL'].replace(['married', 'single', 'divsepwid'], [0, 1, 2], inplace = True)
|
29 |
-
|
30 |
-
dataset['HOWPAID'] = dataset['HOWPAID'].replace(['n', 'y'], [0, 1], inplace = True)
|
31 |
-
|
32 |
-
dataset['MORTGAGE'] = dataset['MORTGAGE'].replace(['weekly', 'monthly'], [0, 1], inplace = True)
|
33 |
dataset = dataset.drop(['ID'], axis = 1)
|
34 |
-
X = dataset.iloc[:, :-1].values
|
35 |
y = dataset.iloc[:, -1].values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
# Encoding the Independent Variables
|
38 |
-
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(sparse_output=False), [2, 6])], remainder = 'passthrough')
|
39 |
-
X = np.array(ct.fit_transform(X))
|
40 |
-
#X= X.astype('int')
|
41 |
|
42 |
# Encoding the Dependent Variable
|
43 |
le = LabelEncoder()
|
@@ -46,10 +41,7 @@ y = le.fit_transform(y)
|
|
46 |
# Spliting the datset into Training and Test set
|
47 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)
|
48 |
|
49 |
-
|
50 |
-
sc = StandardScaler()
|
51 |
-
X_train = sc.fit_transform(X_train)
|
52 |
-
X_test = sc.transform(X_test)
|
53 |
|
54 |
# Training Logit Reg Model using the Training set
|
55 |
model = LogisticRegression()
|
|
|
9 |
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
|
10 |
from sklearn.linear_model import LogisticRegression
|
11 |
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
|
12 |
+
from transformers import Trainer, TrainingArguments
|
13 |
from skops import hub_utils
|
14 |
import pickle
|
15 |
from skops.card import Card, metadata_from_config
|
|
|
23 |
dataset = load_dataset(dataset_name, split = 'train')
|
24 |
dataset = pd.DataFrame(dataset)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
dataset = dataset.drop(['ID'], axis = 1)
|
|
|
27 |
y = dataset.iloc[:, -1].values
|
28 |
+
dataset = dataset.drop(['RISK'], axis = 1)
|
29 |
+
|
30 |
+
# Encoding the Independent Variables and Applying Feature Scaling
|
31 |
+
from sklearn.compose import make_column_transformer
|
32 |
+
from sklearn.compose import make_column_selector
|
33 |
+
ct = make_column_transformer((StandardScaler(),make_column_selector(dtype_include=np.number)),[OneHotEncoder(), make_column_selector(dtype_include=object)], remainder = 'passthrough')
|
34 |
+
X = ct.fit_transform(dataset)
|
35 |
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Encoding the Dependent Variable
|
38 |
le = LabelEncoder()
|
|
|
41 |
# Spliting the datset into Training and Test set
|
42 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)
|
43 |
|
44 |
+
|
|
|
|
|
|
|
45 |
|
46 |
# Training Logit Reg Model using the Training set
|
47 |
model = LogisticRegression()
|