File size: 8,211 Bytes
e0d8ab4
 
 
 
094f22f
e0d8ab4
094f22f
e0d8ab4
094f22f
 
e0d8ab4
 
094f22f
 
 
 
 
e0d8ab4
094f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
e0d8ab4
 
 
094f22f
 
 
 
 
 
 
 
 
e0d8ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
094f22f
 
 
e0d8ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
094f22f
e0d8ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2225e11
 
e0d8ab4
094f22f
 
e0d8ab4
094f22f
e0d8ab4
 
 
094f22f
 
e0d8ab4
 
094f22f
e0d8ab4
094f22f
e0d8ab4
094f22f
 
 
 
 
 
 
 
 
 
e0d8ab4
094f22f
e0d8ab4
094f22f
e0d8ab4
094f22f
e0d8ab4
 
 
094f22f
 
e0d8ab4
 
 
094f22f
e0d8ab4
 
094f22f
e0d8ab4
094f22f
 
 
e0d8ab4
094f22f
e0d8ab4
 
 
094f22f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import pickle
import shap

class UhiPredictor:
    """
    Urban Heat Island Predictor Class that predicts new instances and explains the prediction using SHAP.
    
    INPUTS
    ---
    model_path: str - Path to the trained model file.
    scaler_path: str - Path to the standard scaler file.
    explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
    ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
    feature_names: list - Feature names for SHAP analysis.
    """

    def __init__(self, model_path, scaler_path, explainer_type, ref_data, feature_names):
        """
        Initializes the UHI predictor with a trained model, scaler, and SHAP explainer.

        INPUTS
        ---
        model_path: str - Path to the model file.
        scaler_path: str - Path to the standard scaler file.
        explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
        ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
        feature_names: list - Feature names for SHAP explanation.
        """
        # Load the model and scaler
        self.model = load_model(model_path)
        with open(scaler_path, 'rb') as f:
            self.scaler = pickle.load(f)

        # Ensure reference data is in NumPy format
        ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data  
        
        # Initialize SHAP explainer
        self.explainer_type = explainer_type
        self.explainer = self.explainer_type(self.model, ref_data)
        self.feature_names = feature_names

    def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Preprocess the input DataFrame to create new features for the model.

        INPUT
        -----
        df: pd.DataFrame
            The input DataFrame containing the features.

        OUTPUT
        ------
        pd.DataFrame
            The preprocessed DataFrame with additional features.
        """
        Wind_Direction_radians = np.radians(df["Wind_Direction_deg"])
        Wind_X = np.sin(Wind_Direction_radians)
        Wind_Y = np.cos(Wind_Direction_radians)

        m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
        m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
        m150_Humidity_NDVI = df["Relative_Humidity"] * df["150m_NDVI"]
        m150_Traffic_NDBI = df["Traffic_Volume"] * df["150m_NDBI"]
        m300_Building_Wind_X = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_X
        m300_Building_Wind_Y = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_Y
        m300_Elevation_Wind_Y = df["300m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
        m300_BldgHeight_Count = df["300m_Building_Height"] * df["300m_Building_Count"]
        m300_TotalBuildingArea_NDVI = df["300m_Total_Building_Area_m2"] * df["300m_NDVI"]
        m300_Traffic_NDVI = df["Traffic_Volume"] * df["300m_NDVI"]
        m300_Traffic_NDBI = df["Traffic_Volume"] * df["300m_NDBI"]
        m300_Building_Aspect_Ratio = df["300m_Building_Height"] / np.sqrt(df["300m_Total_Building_Area_m2"] + 1e-6)
        m300_Sky_View_Factor = 1 - df["300m_Building_Density"]
        m300_Canopy_Cover_Ratio = df["300m_NDVI"] / (df["300m_Building_Density"] + 1e-6)
        m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"] 

        output = {
            "50m_1NPCRI": df["150m_NPCRI"],
            "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
            "150m_Traffic_Volume": df["Traffic_Volume"],
            "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
            "150m_Humidity_NDVI": m150_Humidity_NDVI,
            "150m_Traffic_NDBI": m150_Traffic_NDBI,
            "300m_SI": df["300m_SI"],
            "300m_NPCRI": df["300m_NPCRI"],
            "300m_Coastal_Aerosol": df["300m_Coastal_Aerosol"],
            "300m_Total_Building_Area_m2": df["300m_Total_Building_Area_m2"],
            "300m_Building_Construction_Year": df["300m_Building_Construction_Year"],
            "300m_Ground_Elevation": df["300m_Ground_Elevation"],
            "300m_Building_Wind_X": m300_Building_Wind_X,
            "300m_Building_Wind_Y": m300_Building_Wind_Y,
            "300m_Elevation_Wind_Y": m300_Elevation_Wind_Y,
            "300m_BldgHeight_Count": m300_BldgHeight_Count,
            "300m_TotalBuildingArea_NDVI": m300_TotalBuildingArea_NDVI,
            "300m_Traffic_NDVI": m300_Traffic_NDVI,
            "300m_Traffic_NDBI": m300_Traffic_NDBI,
            "300m_Building_Aspect_Ratio": m300_Building_Aspect_Ratio,
            "300m_Sky_View_Factor": m300_Sky_View_Factor,
            "300m_Canopy_Cover_Ratio": m300_Canopy_Cover_Ratio,
            "300m_GHG_Proxy": m300_GHG_Proxy
        }

        output = pd.DataFrame(output, index=[0])

        return output

    def scale(self, X: pd.DataFrame) -> np.ndarray:
        """
        Apply the scaler used to train the model to the new data.

        INPUT
        -----
        X: pd.DataFrame - The data to be scaled.

        OUTPUT
        ------
        np.ndarray - The scaled data.
        """
        return self.scaler.transform(X)

    def compute_shap_values(self, X):
        """
        Computes SHAP values for the record.
        """
        # Compute SHAP values
        shap_values = self.explainer.shap_values(X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(X)
        
        # Apply squeeze only if the array has three dimensions and the last dimension is 1
        if shap_values.ndim == 3 and shap_values.shape[-1] == 1:
            shap_values = np.squeeze(shap_values)

        return shap_values

    def predict(self, X: pd.DataFrame, location=(None, None)) -> dict:
        """
        Make a prediction on one sample and explain the prediction using SHAP.

        INPUT
        -----
        X: pd.DataFrame - The data to predict a UHI index for (must be one sample).
        location: tuple (longitude, latitude) - Optional location data.

        OUTPUT
        ------
        dict - A dictionary containing the predicted UHI index and SHAP reasoning.
        """
        if X.shape[0] != 1:
            raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found.")

        # Preprocess and scale input data
        X_processed = self.preprocess(X)
        X_scaled = self.scale(X_processed).reshape(1, -1)

        # Predict UHI index
        y_pred = self.model.predict(X_scaled)
        uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]

        # Compute SHAP values
        shap_values = self.compute_shap_values(X_scaled)

        # Extract expected base value, Ensure expected_value is a single value (not tensor)
        if self.explainer_type == shap.DeepExplainer:
            expected_value = np.array(self.explainer.expected_value)
        else:
            expected_value = self.explainer.expected_value

        # Extract single value if expected_value is an array
        if isinstance(expected_value, np.ndarray):
            expected_value = expected_value[0]

        # Compute SHAP-based final prediction
        shap_final_prediction = expected_value + sum(shap_values)

        # Structure feature contributions
        feature_contributions = [
            {
                "feature": feature,
                "shap_value": value,
                "impact": "increase" if value > 0 else "decrease"
            }
            for feature, value in zip(self.feature_names, shap_values)
        ]

        # Create the final output
        prediction_output = {
            "longitude": location[0],
            "latitude": location[1],
            "predicted_uhi_index": uhi,
            "base_value": expected_value,
            "shap_final_prediction": shap_final_prediction,
            "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
            "feature_contributions": feature_contributions,
        }

        return prediction_output