File size: 8,211 Bytes
e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 2225e11 e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f e0d8ab4 094f22f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import pickle
import shap
class UhiPredictor:
"""
Urban Heat Island Predictor Class that predicts new instances and explains the prediction using SHAP.
INPUTS
---
model_path: str - Path to the trained model file.
scaler_path: str - Path to the standard scaler file.
explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
feature_names: list - Feature names for SHAP analysis.
"""
def __init__(self, model_path, scaler_path, explainer_type, ref_data, feature_names):
"""
Initializes the UHI predictor with a trained model, scaler, and SHAP explainer.
INPUTS
---
model_path: str - Path to the model file.
scaler_path: str - Path to the standard scaler file.
explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
feature_names: list - Feature names for SHAP explanation.
"""
# Load the model and scaler
self.model = load_model(model_path)
with open(scaler_path, 'rb') as f:
self.scaler = pickle.load(f)
# Ensure reference data is in NumPy format
ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data
# Initialize SHAP explainer
self.explainer_type = explainer_type
self.explainer = self.explainer_type(self.model, ref_data)
self.feature_names = feature_names
def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Preprocess the input DataFrame to create new features for the model.
INPUT
-----
df: pd.DataFrame
The input DataFrame containing the features.
OUTPUT
------
pd.DataFrame
The preprocessed DataFrame with additional features.
"""
Wind_Direction_radians = np.radians(df["Wind_Direction_deg"])
Wind_X = np.sin(Wind_Direction_radians)
Wind_Y = np.cos(Wind_Direction_radians)
m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
m150_Humidity_NDVI = df["Relative_Humidity"] * df["150m_NDVI"]
m150_Traffic_NDBI = df["Traffic_Volume"] * df["150m_NDBI"]
m300_Building_Wind_X = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_X
m300_Building_Wind_Y = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_Y
m300_Elevation_Wind_Y = df["300m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
m300_BldgHeight_Count = df["300m_Building_Height"] * df["300m_Building_Count"]
m300_TotalBuildingArea_NDVI = df["300m_Total_Building_Area_m2"] * df["300m_NDVI"]
m300_Traffic_NDVI = df["Traffic_Volume"] * df["300m_NDVI"]
m300_Traffic_NDBI = df["Traffic_Volume"] * df["300m_NDBI"]
m300_Building_Aspect_Ratio = df["300m_Building_Height"] / np.sqrt(df["300m_Total_Building_Area_m2"] + 1e-6)
m300_Sky_View_Factor = 1 - df["300m_Building_Density"]
m300_Canopy_Cover_Ratio = df["300m_NDVI"] / (df["300m_Building_Density"] + 1e-6)
m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
output = {
"50m_1NPCRI": df["150m_NPCRI"],
"100m_Elevation_Wind_X": m100_Elevation_Wind_X,
"150m_Traffic_Volume": df["Traffic_Volume"],
"150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
"150m_Humidity_NDVI": m150_Humidity_NDVI,
"150m_Traffic_NDBI": m150_Traffic_NDBI,
"300m_SI": df["300m_SI"],
"300m_NPCRI": df["300m_NPCRI"],
"300m_Coastal_Aerosol": df["300m_Coastal_Aerosol"],
"300m_Total_Building_Area_m2": df["300m_Total_Building_Area_m2"],
"300m_Building_Construction_Year": df["300m_Building_Construction_Year"],
"300m_Ground_Elevation": df["300m_Ground_Elevation"],
"300m_Building_Wind_X": m300_Building_Wind_X,
"300m_Building_Wind_Y": m300_Building_Wind_Y,
"300m_Elevation_Wind_Y": m300_Elevation_Wind_Y,
"300m_BldgHeight_Count": m300_BldgHeight_Count,
"300m_TotalBuildingArea_NDVI": m300_TotalBuildingArea_NDVI,
"300m_Traffic_NDVI": m300_Traffic_NDVI,
"300m_Traffic_NDBI": m300_Traffic_NDBI,
"300m_Building_Aspect_Ratio": m300_Building_Aspect_Ratio,
"300m_Sky_View_Factor": m300_Sky_View_Factor,
"300m_Canopy_Cover_Ratio": m300_Canopy_Cover_Ratio,
"300m_GHG_Proxy": m300_GHG_Proxy
}
output = pd.DataFrame(output, index=[0])
return output
def scale(self, X: pd.DataFrame) -> np.ndarray:
"""
Apply the scaler used to train the model to the new data.
INPUT
-----
X: pd.DataFrame - The data to be scaled.
OUTPUT
------
np.ndarray - The scaled data.
"""
return self.scaler.transform(X)
def compute_shap_values(self, X):
"""
Computes SHAP values for the record.
"""
# Compute SHAP values
shap_values = self.explainer.shap_values(X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(X)
# Apply squeeze only if the array has three dimensions and the last dimension is 1
if shap_values.ndim == 3 and shap_values.shape[-1] == 1:
shap_values = np.squeeze(shap_values)
return shap_values
def predict(self, X: pd.DataFrame, location=(None, None)) -> dict:
"""
Make a prediction on one sample and explain the prediction using SHAP.
INPUT
-----
X: pd.DataFrame - The data to predict a UHI index for (must be one sample).
location: tuple (longitude, latitude) - Optional location data.
OUTPUT
------
dict - A dictionary containing the predicted UHI index and SHAP reasoning.
"""
if X.shape[0] != 1:
raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found.")
# Preprocess and scale input data
X_processed = self.preprocess(X)
X_scaled = self.scale(X_processed).reshape(1, -1)
# Predict UHI index
y_pred = self.model.predict(X_scaled)
uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
# Compute SHAP values
shap_values = self.compute_shap_values(X_scaled)
# Extract expected base value, Ensure expected_value is a single value (not tensor)
if self.explainer_type == shap.DeepExplainer:
expected_value = np.array(self.explainer.expected_value)
else:
expected_value = self.explainer.expected_value
# Extract single value if expected_value is an array
if isinstance(expected_value, np.ndarray):
expected_value = expected_value[0]
# Compute SHAP-based final prediction
shap_final_prediction = expected_value + sum(shap_values)
# Structure feature contributions
feature_contributions = [
{
"feature": feature,
"shap_value": value,
"impact": "increase" if value > 0 else "decrease"
}
for feature, value in zip(self.feature_names, shap_values)
]
# Create the final output
prediction_output = {
"longitude": location[0],
"latitude": location[1],
"predicted_uhi_index": uhi,
"base_value": expected_value,
"shap_final_prediction": shap_final_prediction,
"uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
"feature_contributions": feature_contributions,
}
return prediction_output |