nakere424 commited on
Commit
c17075c
·
1 Parent(s): 3684b96

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -75
app.py DELETED
@@ -1,75 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Copy of Lab06.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1eKsEZ2OurE_fRyVw_cxMPq5cuYpUkSJM
8
-
9
- We will train an XGBoost model on the Adult's Income dataset and deploy it on Hugging Face spaces.
10
- """
11
-
12
- !wget http://www.donlapark.cmustat.com/Income.csv
13
-
14
- import pandas as pd
15
- from sklearn.compose import ColumnTransformer
16
- from sklearn.pipeline import Pipeline
17
- from sklearn.preprocessing import OneHotEncoder, StandardScaler
18
-
19
- from xgboost import XGBClassifier
20
-
21
-
22
- EDU_DICT = {'Preschool': 1,
23
- '1st-4th': 2,
24
- '5th-6th': 3,
25
- '7th-8th': 4,
26
- '9th': 5,
27
- '10th': 6,
28
- '11th': 7,
29
- '12th': 8,
30
- 'HS-grad': 9,
31
- 'Some-college': 10,
32
- 'Assoc-voc': 11,
33
- 'Assoc-acdm': 12,
34
- 'Bachelors': 13,
35
- 'Masters': 14,
36
- 'Prof-school': 15,
37
- 'Doctorate': 16
38
- }
39
-
40
-
41
- X_train = pd.read_csv('Income.csv')
42
-
43
- X_train
44
-
45
- y_train = X_train.pop("income")
46
- y_train = (y_train == ">50K").astype(int)
47
- X_train['education'].replace(EDU_DICT, inplace=True)
48
-
49
- # Names of numerical features
50
- num_col = X_train.select_dtypes(include=['int64', 'float64']).columns
51
- # Names of categorical features
52
- cat_col = X_train.select_dtypes(include=['object', 'bool']).columns
53
-
54
- print(num_col)
55
- print(cat_col)
56
-
57
- # print num_col and cat_col
58
-
59
- preprocessor = ColumnTransformer([("scaler", StandardScaler(), num_col),
60
- ("onehot", OneHotEncoder(sparse=False), cat_col)])
61
-
62
- model = Pipeline(steps=[('preprocessor', preprocessor),
63
- ('classifier', XGBClassifier())])
64
-
65
- model.fit(X_train, y_train)
66
-
67
- """### Saving the model"""
68
-
69
- import joblib
70
-
71
- joblib.dump(model, 'model.joblib')
72
-
73
- unique_values = {col:X_train[col].unique() for col in cat_col}
74
-
75
- joblib.dump(unique_values, 'unique_values.joblib')