Spaces:
Paused
Paused
Create utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# create new features
|
| 2 |
+
def create_new_features(df):
|
| 3 |
+
df['year_sold'] = df['date'].dt.year
|
| 4 |
+
df = df.drop(columns=['date'])
|
| 5 |
+
df['house_age'] = df['year_sold'] - df['yr_built']
|
| 6 |
+
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
|
| 7 |
+
df.drop(columns=['year_sold'], inplace=True)
|
| 8 |
+
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
| 9 |
+
return df
|
| 10 |
+
|
| 11 |
+
def normalize(df, col, min_dict, max_dict):
|
| 12 |
+
numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
|
| 13 |
+
'view', 'condition', 'sqft_above', 'sqft_basement',
|
| 14 |
+
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
|
| 15 |
+
|
| 16 |
+
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
| 17 |
+
return df[col]
|