Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -6,38 +6,13 @@ def create_new_features(df):
|
|
| 6 |
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
|
| 7 |
df.drop(columns=['year_sold'], inplace=True)
|
| 8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
|
|
|
|
|
|
| 9 |
return df
|
| 10 |
|
| 11 |
def normalize(df):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
# 'sqft_living': 370,
|
| 15 |
-
# 'sqft_lot': 638,
|
| 16 |
-
# 'floors': 1,
|
| 17 |
-
# 'waterfront': 0,
|
| 18 |
-
# 'view': 0,
|
| 19 |
-
# 'condition': 1,
|
| 20 |
-
# 'sqft_above': 370,
|
| 21 |
-
# 'sqft_basement': 0,
|
| 22 |
-
# 'yr_built': 1900,
|
| 23 |
-
# 'yr_renovated': 0,
|
| 24 |
-
# 'house_age': 0,
|
| 25 |
-
# 'years_since_renovation': 0}
|
| 26 |
-
# max_dict = {'bedrooms': 9,
|
| 27 |
-
# 'bathrooms': 8,
|
| 28 |
-
# 'sqft_living': 13540,
|
| 29 |
-
# 'sqft_lot': 1074218,
|
| 30 |
-
# 'floors': 3,
|
| 31 |
-
# 'waterfront': 1,
|
| 32 |
-
# 'view': 4,
|
| 33 |
-
# 'condition': 5,
|
| 34 |
-
# 'sqft_above': 9410,
|
| 35 |
-
# 'sqft_basement': 4820,
|
| 36 |
-
# 'yr_built': 2014,
|
| 37 |
-
# 'yr_renovated': 2014,
|
| 38 |
-
# 'house_age': 114,
|
| 39 |
-
# 'years_since_renovation': 2014}
|
| 40 |
-
|
| 41 |
with open("./min_dict.json", "r") as f:
|
| 42 |
min_dict = json.load(f)
|
| 43 |
|
|
@@ -52,8 +27,18 @@ def normalize(df):
|
|
| 52 |
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
| 53 |
return df
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def init_new_pred():
|
| 56 |
import pandas as pd
|
|
|
|
| 57 |
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
|
| 58 |
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
|
| 59 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
|
|
|
|
| 6 |
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
|
| 7 |
df.drop(columns=['year_sold'], inplace=True)
|
| 8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
| 9 |
+
mask = df['yr_renovated'] == 0
|
| 10 |
+
df.loc[mask, 'yr_renovated'] = df.loc[mask, 'yr_built']
|
| 11 |
return df
|
| 12 |
|
| 13 |
def normalize(df):
|
| 14 |
+
import json
|
| 15 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
with open("./min_dict.json", "r") as f:
|
| 17 |
min_dict = json.load(f)
|
| 18 |
|
|
|
|
| 27 |
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
| 28 |
return df
|
| 29 |
|
| 30 |
+
def bucketize(df):
|
| 31 |
+
bucket_sizes = {'sqft_living': 25,
|
| 32 |
+
'sqft_lot': 25,
|
| 33 |
+
'sqft_above': 25,
|
| 34 |
+
'sqft_basement': 25}
|
| 35 |
+
for col, size in bucket_sizes.items():
|
| 36 |
+
df[col] = df[col].apply(lambda x: (x // size)*size)
|
| 37 |
+
return df
|
| 38 |
+
|
| 39 |
def init_new_pred():
|
| 40 |
import pandas as pd
|
| 41 |
+
|
| 42 |
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
|
| 43 |
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
|
| 44 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
|