Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""Copy of Lab06.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM | |
We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces. | |
""" | |
!wget http://www.donlapark.cmustat.com/Income.csv | |
import pandas as pd | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
from xgboost import XGBClassifier | |
EDU_DICT = {'Preschool': 1, | |
'1st-4th': 2, | |
'5th-6th': 3, | |
'7th-8th': 4, | |
'9th': 5, | |
'10th': 6, | |
'11th': 7, | |
'12th': 8, | |
'HS-grad': 9, | |
'Some-college': 10, | |
'Assoc-voc': 11, | |
'Assoc-acdm': 12, | |
'Bachelors': 13, | |
'Masters': 14, | |
'Prof-school': 15, | |
'Doctorate': 16 | |
} | |
X_train = pd.read_csv('Income.csv') | |
X_train | |
y_train = X_train.pop("income") | |
y_train = (y_train == ">50K").astype(int) | |
X_train['education'].replace(EDU_DICT, inplace=True) | |
# Names of numerical features | |
num_col = X_train.select_dtypes(include=['int64', 'float64']).columns | |
# Names of categorical features | |
cat_col = X_train.select_dtypes(include=['object', 'bool']).columns | |
print(num_col) | |
print(cat_col) | |
# print num_col and cat_col | |
preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col), | |
("onehot", OneHotEncoder(sparse=False), cat_col)]) | |
model = Pipeline(steps=[('preprocessor', preprocessor), | |
('classifier', XGBClassifier())]) | |
model.fit(X_train, y_train) | |
"""### Saving the model""" | |
import joblib | |
joblib.dump(model, 'model.joblib') | |
unique_values = {col:X_train[col].unique() for col in cat_col} | |
joblib.dump(unique_values, 'unique_values.joblib') |