Spaces:
Runtime error
Runtime error
Delete app.py
Browse files
app.py
DELETED
|
@@ -1,75 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""Copy of Lab06.ipynb
|
| 3 |
-
|
| 4 |
-
Automatically generated by Colaboratory.
|
| 5 |
-
|
| 6 |
-
Original file is located at
|
| 7 |
-
https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM
|
| 8 |
-
|
| 9 |
-
We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces.
|
| 10 |
-
"""
|
| 11 |
-
|
| 12 |
-
!wget http://www.donlapark.cmustat.com/Income.csv
|
| 13 |
-
|
| 14 |
-
import pandas as pd
|
| 15 |
-
from sklearn.compose import ColumnTransformer
|
| 16 |
-
from sklearn.pipeline import Pipeline
|
| 17 |
-
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
| 18 |
-
|
| 19 |
-
from xgboost import XGBClassifier
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
EDU_DICT = {'Preschool': 1,
|
| 23 |
-
'1st-4th': 2,
|
| 24 |
-
'5th-6th': 3,
|
| 25 |
-
'7th-8th': 4,
|
| 26 |
-
'9th': 5,
|
| 27 |
-
'10th': 6,
|
| 28 |
-
'11th': 7,
|
| 29 |
-
'12th': 8,
|
| 30 |
-
'HS-grad': 9,
|
| 31 |
-
'Some-college': 10,
|
| 32 |
-
'Assoc-voc': 11,
|
| 33 |
-
'Assoc-acdm': 12,
|
| 34 |
-
'Bachelors': 13,
|
| 35 |
-
'Masters': 14,
|
| 36 |
-
'Prof-school': 15,
|
| 37 |
-
'Doctorate': 16
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
X_train = pd.read_csv('Income.csv')
|
| 42 |
-
|
| 43 |
-
X_train
|
| 44 |
-
|
| 45 |
-
y_train = X_train.pop("income")
|
| 46 |
-
y_train = (y_train == ">50K").astype(int)
|
| 47 |
-
X_train['education'].replace(EDU_DICT, inplace=True)
|
| 48 |
-
|
| 49 |
-
# Names of numerical features
|
| 50 |
-
num_col = X_train.select_dtypes(include=['int64', 'float64']).columns
|
| 51 |
-
# Names of categorical features
|
| 52 |
-
cat_col = X_train.select_dtypes(include=['object', 'bool']).columns
|
| 53 |
-
|
| 54 |
-
print(num_col)
|
| 55 |
-
print(cat_col)
|
| 56 |
-
|
| 57 |
-
# print num_col and cat_col
|
| 58 |
-
|
| 59 |
-
preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col),
|
| 60 |
-
("onehot", OneHotEncoder(sparse=False), cat_col)])
|
| 61 |
-
|
| 62 |
-
model = Pipeline(steps=[('preprocessor', preprocessor),
|
| 63 |
-
('classifier', XGBClassifier())])
|
| 64 |
-
|
| 65 |
-
model.fit(X_train, y_train)
|
| 66 |
-
|
| 67 |
-
"""### Saving the model"""
|
| 68 |
-
|
| 69 |
-
import joblib
|
| 70 |
-
|
| 71 |
-
joblib.dump(model, 'model.joblib')
|
| 72 |
-
|
| 73 |
-
unique_values = {col:X_train[col].unique() for col in cat_col}
|
| 74 |
-
|
| 75 |
-
joblib.dump(unique_values, 'unique_values.joblib')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|