Spaces:

nakere424
/

nakere424_Class_homework

Runtime error

App Files Files Community

nakere424_Class_homework / app.py

nakere424

Upload app.py

2798386 over 2 years ago

raw

history blame

1.94 kB

	# -- coding: utf-8 --
	"""Copy of Lab06.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM

	We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces.
	"""

	!wget http://www.donlapark.cmustat.com/Income.csv

	import pandas as pd
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder, StandardScaler

	from xgboost import XGBClassifier


	EDU_DICT = {'Preschool': 1,
	'1st-4th': 2,
	'5th-6th': 3,
	'7th-8th': 4,
	'9th': 5,
	'10th': 6,
	'11th': 7,
	'12th': 8,
	'HS-grad': 9,
	'Some-college': 10,
	'Assoc-voc': 11,
	'Assoc-acdm': 12,
	'Bachelors': 13,
	'Masters': 14,
	'Prof-school': 15,
	'Doctorate': 16
	}


	X_train = pd.read_csv('Income.csv')

	X_train

	y_train = X_train.pop("income")
	y_train = (y_train == ">50K").astype(int)
	X_train['education'].replace(EDU_DICT, inplace=True)

	# Names of numerical features
	num_col = X_train.select_dtypes(include=['int64', 'float64']).columns
	# Names of categorical features
	cat_col = X_train.select_dtypes(include=['object', 'bool']).columns

	print(num_col)
	print(cat_col)

	# print num_col and cat_col

	preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col),
	("onehot", OneHotEncoder(sparse=False), cat_col)])

	model = Pipeline(steps=[('preprocessor', preprocessor),
	('classifier', XGBClassifier())])

	model.fit(X_train, y_train)

	"""### Saving the model"""

	import joblib

	joblib.dump(model, 'model.joblib')

	unique_values = {col:X_train[col].unique() for col in cat_col}

	joblib.dump(unique_values, 'unique_values.joblib')