Spaces:
Runtime error
Runtime error
Delete app.py
Browse files
app.py
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""Copy of Lab06.ipynb
|
3 |
-
|
4 |
-
Automatically generated by Colaboratory.
|
5 |
-
|
6 |
-
Original file is located at
|
7 |
-
https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM
|
8 |
-
|
9 |
-
We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces.
|
10 |
-
"""
|
11 |
-
|
12 |
-
!wget http://www.donlapark.cmustat.com/Income.csv
|
13 |
-
|
14 |
-
import pandas as pd
|
15 |
-
from sklearn.compose import ColumnTransformer
|
16 |
-
from sklearn.pipeline import Pipeline
|
17 |
-
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
18 |
-
|
19 |
-
from xgboost import XGBClassifier
|
20 |
-
|
21 |
-
|
22 |
-
EDU_DICT = {'Preschool': 1,
|
23 |
-
'1st-4th': 2,
|
24 |
-
'5th-6th': 3,
|
25 |
-
'7th-8th': 4,
|
26 |
-
'9th': 5,
|
27 |
-
'10th': 6,
|
28 |
-
'11th': 7,
|
29 |
-
'12th': 8,
|
30 |
-
'HS-grad': 9,
|
31 |
-
'Some-college': 10,
|
32 |
-
'Assoc-voc': 11,
|
33 |
-
'Assoc-acdm': 12,
|
34 |
-
'Bachelors': 13,
|
35 |
-
'Masters': 14,
|
36 |
-
'Prof-school': 15,
|
37 |
-
'Doctorate': 16
|
38 |
-
}
|
39 |
-
|
40 |
-
|
41 |
-
X_train = pd.read_csv('Income.csv')
|
42 |
-
|
43 |
-
X_train
|
44 |
-
|
45 |
-
y_train = X_train.pop("income")
|
46 |
-
y_train = (y_train == ">50K").astype(int)
|
47 |
-
X_train['education'].replace(EDU_DICT, inplace=True)
|
48 |
-
|
49 |
-
# Names of numerical features
|
50 |
-
num_col = X_train.select_dtypes(include=['int64', 'float64']).columns
|
51 |
-
# Names of categorical features
|
52 |
-
cat_col = X_train.select_dtypes(include=['object', 'bool']).columns
|
53 |
-
|
54 |
-
print(num_col)
|
55 |
-
print(cat_col)
|
56 |
-
|
57 |
-
# print num_col and cat_col
|
58 |
-
|
59 |
-
preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col),
|
60 |
-
("onehot", OneHotEncoder(sparse=False), cat_col)])
|
61 |
-
|
62 |
-
model = Pipeline(steps=[('preprocessor', preprocessor),
|
63 |
-
('classifier', XGBClassifier())])
|
64 |
-
|
65 |
-
model.fit(X_train, y_train)
|
66 |
-
|
67 |
-
"""### Saving the model"""
|
68 |
-
|
69 |
-
import joblib
|
70 |
-
|
71 |
-
joblib.dump(model, 'model.joblib')
|
72 |
-
|
73 |
-
unique_values = {col:X_train[col].unique() for col in cat_col}
|
74 |
-
|
75 |
-
joblib.dump(unique_values, 'unique_values.joblib')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|