nakere424 commited on
Commit
2798386
·
1 Parent(s): 24420af

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Copy of Lab06.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM
8
+
9
+ We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces.
10
+ """
11
+
12
+ !wget http://www.donlapark.cmustat.com/Income.csv
13
+
14
+ import pandas as pd
15
+ from sklearn.compose import ColumnTransformer
16
+ from sklearn.pipeline import Pipeline
17
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
18
+
19
+ from xgboost import XGBClassifier
20
+
21
+
22
+ EDU_DICT = {'Preschool': 1,
23
+ '1st-4th': 2,
24
+ '5th-6th': 3,
25
+ '7th-8th': 4,
26
+ '9th': 5,
27
+ '10th': 6,
28
+ '11th': 7,
29
+ '12th': 8,
30
+ 'HS-grad': 9,
31
+ 'Some-college': 10,
32
+ 'Assoc-voc': 11,
33
+ 'Assoc-acdm': 12,
34
+ 'Bachelors': 13,
35
+ 'Masters': 14,
36
+ 'Prof-school': 15,
37
+ 'Doctorate': 16
38
+ }
39
+
40
+
41
+ X_train = pd.read_csv('Income.csv')
42
+
43
+ X_train
44
+
45
+ y_train = X_train.pop("income")
46
+ y_train = (y_train == ">50K").astype(int)
47
+ X_train['education'].replace(EDU_DICT, inplace=True)
48
+
49
+ # Names of numerical features
50
+ num_col = X_train.select_dtypes(include=['int64', 'float64']).columns
51
+ # Names of categorical features
52
+ cat_col = X_train.select_dtypes(include=['object', 'bool']).columns
53
+
54
+ print(num_col)
55
+ print(cat_col)
56
+
57
+ # print num_col and cat_col
58
+
59
+ preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col),
60
+ ("onehot", OneHotEncoder(sparse=False), cat_col)])
61
+
62
+ model = Pipeline(steps=[('preprocessor', preprocessor),
63
+ ('classifier', XGBClassifier())])
64
+
65
+ model.fit(X_train, y_train)
66
+
67
+ """### Saving the model"""
68
+
69
+ import joblib
70
+
71
+ joblib.dump(model, 'model.joblib')
72
+
73
+ unique_values = {col:X_train[col].unique() for col in cat_col}
74
+
75
+ joblib.dump(unique_values, 'unique_values.joblib')