saifhmb commited on
Commit
72f76c6
·
unverified ·
1 Parent(s): b5fee77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -17
app.py CHANGED
@@ -9,7 +9,7 @@ from sklearn.model_selection import train_test_split
9
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
10
  from sklearn.linear_model import LogisticRegression
11
  from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
12
- #from transformers import Trainer, TrainingArguments
13
  from skops import hub_utils
14
  import pickle
15
  from skops.card import Card, metadata_from_config
@@ -23,21 +23,16 @@ dataset_name = "saifhmb/CreditCardRisk"
23
  dataset = load_dataset(dataset_name, split = 'train')
24
  dataset = pd.DataFrame(dataset)
25
 
26
- dataset['GENDER'] = dataset['GENDER'].replace(['n', 'y'], [0, 1], inplace = True)
27
-
28
- dataset['MARITAL'] = dataset['MARITAL'].replace(['married', 'single', 'divsepwid'], [0, 1, 2], inplace = True)
29
-
30
- dataset['HOWPAID'] = dataset['HOWPAID'].replace(['n', 'y'], [0, 1], inplace = True)
31
-
32
- dataset['MORTGAGE'] = dataset['MORTGAGE'].replace(['weekly', 'monthly'], [0, 1], inplace = True)
33
  dataset = dataset.drop(['ID'], axis = 1)
34
- X = dataset.iloc[:, :-1].values
35
  y = dataset.iloc[:, -1].values
 
 
 
 
 
 
 
36
 
37
- # Encoding the Independent Variables
38
- ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(sparse_output=False), [2, 6])], remainder = 'passthrough')
39
- X = np.array(ct.fit_transform(X))
40
- #X= X.astype('int')
41
 
42
  # Encoding the Dependent Variable
43
  le = LabelEncoder()
@@ -46,10 +41,7 @@ y = le.fit_transform(y)
46
  # Spliting the datset into Training and Test set
47
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)
48
 
49
- # Feature Scaling
50
- sc = StandardScaler()
51
- X_train = sc.fit_transform(X_train)
52
- X_test = sc.transform(X_test)
53
 
54
  # Training Logit Reg Model using the Training set
55
  model = LogisticRegression()
 
9
  from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
10
  from sklearn.linear_model import LogisticRegression
11
  from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, classification_report
12
+ from transformers import Trainer, TrainingArguments
13
  from skops import hub_utils
14
  import pickle
15
  from skops.card import Card, metadata_from_config
 
23
  dataset = load_dataset(dataset_name, split = 'train')
24
  dataset = pd.DataFrame(dataset)
25
 
 
 
 
 
 
 
 
26
  dataset = dataset.drop(['ID'], axis = 1)
 
27
  y = dataset.iloc[:, -1].values
28
+ dataset = dataset.drop(['RISK'], axis = 1)
29
+
30
+ # Encoding the Independent Variables and Applying Feature Scaling
31
+ from sklearn.compose import make_column_transformer
32
+ from sklearn.compose import make_column_selector
33
+ ct = make_column_transformer((StandardScaler(),make_column_selector(dtype_include=np.number)),[OneHotEncoder(), make_column_selector(dtype_include=object)], remainder = 'passthrough')
34
+ X = ct.fit_transform(dataset)
35
 
 
 
 
 
36
 
37
  # Encoding the Dependent Variable
38
  le = LabelEncoder()
 
41
  # Spliting the datset into Training and Test set
42
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, random_state = 0)
43
 
44
+
 
 
 
45
 
46
  # Training Logit Reg Model using the Training set
47
  model = LogisticRegression()