FranciscoLozDataScience commited on
Commit
094f22f
·
1 Parent(s): bc58ccd

added updates

Browse files
Files changed (5) hide show
  1. UHI_explainer_ref_data.parquet +3 -0
  2. app.py +11 -21
  3. examples.csv +0 -0
  4. explainer.py +0 -110
  5. model.py +102 -37
UHI_explainer_ref_data.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5b020cfc8a638dfc6c2ed9f0b5ad6ad9ed4472f4d6a5d4a75960e89da07388
3
+ size 223375
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
  import shap
3
- from model import UhiModel
4
- from explainer import UhiExplainer
5
  import numpy as np
6
  import pandas as pd
7
  import plotly.graph_objects as go
8
 
9
- MODEL = UhiModel("mixed_buffers_ResNet_model.keras","mixed_buffers_standard_scaler.pkl")
 
 
10
 
11
  def filter_map(uhi, longitude, latitude):
12
  '''
@@ -45,8 +46,8 @@ def filter_map(uhi, longitude, latitude):
45
  return fig
46
 
47
  def predict(
48
- longitude, latitude, m50_NPCRI, m100_Ground_Elevation, avg_wind_speed,
49
- wind_direction, traffic_volume, m150_Ground_Elevation,
50
  relative_humidity, m150_NDVI, m150_NDBI,
51
  m300_SI, m300_NPCRI, m300_Coastal_Aerosol,
52
  m300_Total_Building_Area_m2, m300_Building_Construction_Year, m300_Ground_Elevation,
@@ -60,10 +61,10 @@ def predict(
60
 
61
  # Create a dictionary with input data and dataset var names
62
  input_data = {
63
- "50m_1NPCRI": m50_NPCRI,
64
  "100m_Ground_Elevation": m100_Ground_Elevation,
65
  "Avg_Wind_Speed": avg_wind_speed,
66
- "Wind_Direction": wind_direction,
67
  "Traffic_Volume": traffic_volume,
68
  "150m_Ground_Elevation": m150_Ground_Elevation,
69
  "Relative_Humidity": relative_humidity,
@@ -87,23 +88,12 @@ def predict(
87
  input_df = pd.DataFrame(input_data, index=[0])
88
 
89
  #predict
90
- uhi_index = MODEL.predict(input_df)
91
-
92
- # explain the prediction
93
- explainer = UhiExplainer(
94
- model=MODEL.model,
95
- explainer_type=shap.DeepExplainer,
96
- X=input_df,
97
- feature_names=input_df.columns,
98
- ref_data=input_df,
99
- shap_values=None # Compute SHAP values on the fly
100
- )
101
- reason = explainer.reasoning(index=0, location=(longitude, latitude))
102
 
103
  # generate map
104
- plot = filter_map(uhi_index, longitude, latitude)
105
 
106
- return uhi_index, reason["uhi_status"], reason["feature_contributions"], plot
107
 
108
  def load_examples(csv_file):
109
  '''
 
1
  import gradio as gr
2
  import shap
3
+ from model import UhiPredictor
 
4
  import numpy as np
5
  import pandas as pd
6
  import plotly.graph_objects as go
7
 
8
+ ref_data = pd.read_parquet("UHI_explainer_ref_data.parquet")
9
+ cols = pd.read_parquet("UHI_explainer_ref_data.parquet").columns
10
+ MODEL = UhiPredictor("mixed_buffers_ResNet_model.keras", "mixed_buffers_standard_scaler.pkl", shap.DeepExplainer, ref_data, cols)
11
 
12
  def filter_map(uhi, longitude, latitude):
13
  '''
 
46
  return fig
47
 
48
  def predict(
49
+ longitude, latitude, m150_NPCRI, m100_Ground_Elevation, avg_wind_speed,
50
+ wind_direction_deg, traffic_volume, m150_Ground_Elevation,
51
  relative_humidity, m150_NDVI, m150_NDBI,
52
  m300_SI, m300_NPCRI, m300_Coastal_Aerosol,
53
  m300_Total_Building_Area_m2, m300_Building_Construction_Year, m300_Ground_Elevation,
 
61
 
62
  # Create a dictionary with input data and dataset var names
63
  input_data = {
64
+ "150m_NPCRI": m150_NPCRI,
65
  "100m_Ground_Elevation": m100_Ground_Elevation,
66
  "Avg_Wind_Speed": avg_wind_speed,
67
+ "Wind_Direction_deg": wind_direction_deg,
68
  "Traffic_Volume": traffic_volume,
69
  "150m_Ground_Elevation": m150_Ground_Elevation,
70
  "Relative_Humidity": relative_humidity,
 
88
  input_df = pd.DataFrame(input_data, index=[0])
89
 
90
  #predict
91
+ output = MODEL.predict(input_df)
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # generate map
94
+ plot = filter_map(output["predicted_uhi_index"], longitude, latitude)
95
 
96
+ return output["predicted_uhi_index"] , output["uhi_status"], output["feature_contributions"], plot
97
 
98
  def load_examples(csv_file):
99
  '''
examples.csv CHANGED
The diff for this file is too large to render. See raw diff
 
explainer.py DELETED
@@ -1,110 +0,0 @@
1
- """This module provides an explainer for the model."""
2
-
3
- import shap
4
- import pandas as pd
5
- import numpy as np
6
-
7
- class UhiExplainer:
8
- """
9
- A class for SHAP-based model explanation.
10
-
11
- Attributes:
12
- - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
13
- - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
14
- - X: Data (Pandas DataFrame) used to compute SHAP values.
15
- - feature_names: List of feature names.
16
- - explainer: SHAP explainer instance.
17
- - shap_values: Computed SHAP values.
18
-
19
- Methods:
20
- - apply_shap(): Computes SHAP values.
21
- - summary_plot(): Generates a SHAP summary plot.
22
- - bar_plot(): Generates a bar chart of feature importance.
23
- - dependence_plot(): Generates a dependence plot for a feature.
24
- - force_plot(): Generates a force plot for an individual prediction.
25
- - init_js(): Initializes SHAP for Jupyter Notebook.
26
- - reasoning(): Provides insights on why a record received a high or low UHI index.
27
- """
28
-
29
- def __init__(self, model, explainer_type, X, feature_names, ref_data=None, shap_values=None):
30
- """
31
- Initializes the Explainer with a trained model, explainer type, and dataset.
32
-
33
- Parameters:
34
- - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
35
- - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
36
- - X: Data (Pandas DataFrame) used to compute SHAP values.
37
- - feature_names: List of feature names.
38
- - ref_data (optional): The reference dataset (background dataset) is used by SHAP to estimate the expected output of the model
39
- - shap_values (optional): Precomputed SHAP values
40
- """
41
- self.model = model
42
- self.explainer_type = explainer_type
43
- self.X = np.array(X) if isinstance(X, pd.DataFrame) else X # Ensure NumPy format
44
- if ref_data is not None:
45
- ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data # Ensure NumPy format
46
- self.feature_names = feature_names
47
- self.explainer = explainer_type(model, ref_data) # Initialize explainer
48
- # Compute SHAP values
49
- if shap_values is not None:
50
- self.shap_values = shap_values
51
- else:
52
- self.shap_values = self.explainer.shap_values(self.X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(self.X)
53
- # Apply squeeze only if the array has three dimensions and the last dimension is 1
54
- if self.shap_values.ndim == 3 and self.shap_values.shape[-1] == 1:
55
- self.shap_values = np.squeeze(self.shap_values)
56
-
57
- def reasoning(self, index=0, location=(None, None)):
58
- """
59
- Provides insights on why the record received a high or low UHI index.
60
-
61
- Parameters:
62
- index (int): The index of the observation of interest.
63
- location (tuple) (optional): The location of the record (long, lat).
64
-
65
- Returns:
66
- dict: The insights for the selected record.
67
- """
68
-
69
- # Ensure expected_value is a single value (not tensor)
70
- if self.explainer_type == shap.DeepExplainer:
71
- expected_value = np.array(self.explainer.expected_value)
72
- else:
73
- expected_value = self.explainer.expected_value
74
-
75
- # Extract single value if expected_value is an array
76
- if isinstance(expected_value, np.ndarray):
77
- expected_value = expected_value[0]
78
-
79
- # Validate record index
80
- if index >= len(self.shap_values) or index < 0:
81
- return {"error": "Invalid record index"}
82
-
83
- # Extract SHAP values for the specified record
84
- record_shap_values = self.shap_values[index]
85
-
86
- # Compute SHAP-based final prediction
87
- shap_final_prediction = expected_value + sum(record_shap_values)
88
-
89
- # Structure feature contributions
90
- feature_contributions = [
91
- {
92
- "feature": feature,
93
- "shap_value": value,
94
- "impact": "increase" if value > 0 else "decrease"
95
- }
96
- for feature, value in zip(self.feature_names, record_shap_values)
97
- ]
98
-
99
- # Create JSON structure
100
- shap_json = {
101
- "record_index": index,
102
- "longitude": location[0],
103
- "latitude": location[1],
104
- "base_value": expected_value,
105
- "shap_final_prediction": shap_final_prediction, # SHAP-based predicted value
106
- "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
107
- "feature_contributions": feature_contributions,
108
- }
109
-
110
- return shap_json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model.py CHANGED
@@ -2,21 +2,46 @@ import numpy as np
2
  import pandas as pd
3
  from tensorflow.keras.models import load_model
4
  import pickle
 
5
 
6
- class UhiModel:
7
  """
8
- Urban Heat Island Model Class that can predict new instances
9
-
10
  INPUTS
11
  ---
12
- model_path: the path to the model file
13
- scaler_path: the path to the standard scaler file
 
 
 
14
  """
15
- def __init__(self, model_path, scaler_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  self.model = load_model(model_path)
17
  with open(scaler_path, 'rb') as f:
18
  self.scaler = pickle.load(f)
19
-
 
 
 
 
 
 
 
 
20
  def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
21
  """
22
  Preprocess the input DataFrame to create new features for the model.
@@ -31,8 +56,9 @@ class UhiModel:
31
  pd.DataFrame
32
  The preprocessed DataFrame with additional features.
33
  """
34
- Wind_X = np.sin(df["Wind_Direction"])
35
- Wind_Y = np.cos(df["Wind_Direction"])
 
36
 
37
  m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
38
  m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
@@ -51,7 +77,7 @@ class UhiModel:
51
  m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
52
 
53
  output = {
54
- "50m_1NPCRI": df["50m_1NPCRI"],
55
  "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
56
  "150m_Traffic_Volume": df["Traffic_Volume"],
57
  "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
@@ -79,54 +105,93 @@ class UhiModel:
79
  output = pd.DataFrame(output, index=[0])
80
 
81
  return output
82
-
83
- def scale(self, X):
84
  """
85
- Apply the scaler used to train the model to the new data
86
 
87
  INPUT
88
  -----
89
- X: the data to be scaled
90
-
91
  OUTPUT
92
  ------
93
- returns the scaled data
94
  """
 
95
 
96
- new_data_scaled = self.scaler.transform(X)
 
 
 
 
 
 
 
 
 
97
 
98
- return new_data_scaled
99
 
100
- def predict(self, X: pd.DataFrame) -> float:
101
  """
102
- Make a prediction on one sample using the loaded model.
103
 
104
  INPUT
105
  -----
106
- X: pd.DataFrame
107
- The data to predict a UHI index for. Must contain only one sample.
108
 
109
  OUTPUT
110
  ------
111
- str:
112
- Predicted UHI index.
113
  """
114
-
115
- # Check that input contains only one sample
116
  if X.shape[0] != 1:
117
- raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found")
118
-
119
- # Preprocess the input data to create new features
120
- X_processed = self.preprocess(X)
121
 
122
- # Scale the input data
123
- X_scaled = self.scale(X_processed)
 
124
 
125
- # Make prediction
126
  y_pred = self.model.predict(X_scaled)
127
-
128
- # Extract the predicted UHI index (assuming it's a single value)
129
  uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
130
 
131
- # Return UHI
132
- return uhi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  from tensorflow.keras.models import load_model
4
  import pickle
5
+ import shap
6
 
7
+ class UhiPredictor:
8
  """
9
+ Urban Heat Island Predictor Class that predicts new instances and explains the prediction using SHAP.
10
+
11
  INPUTS
12
  ---
13
+ model_path: str - Path to the trained model file.
14
+ scaler_path: str - Path to the standard scaler file.
15
+ explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
16
+ ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
17
+ feature_names: list - Feature names for SHAP analysis.
18
  """
19
+
20
+ def __init__(self, model_path, scaler_path, explainer_type, ref_data, feature_names):
21
+ """
22
+ Initializes the UHI predictor with a trained model, scaler, and SHAP explainer.
23
+
24
+ INPUTS
25
+ ---
26
+ model_path: str - Path to the model file.
27
+ scaler_path: str - Path to the standard scaler file.
28
+ explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
29
+ ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
30
+ feature_names: list - Feature names for SHAP explanation.
31
+ """
32
+ # Load the model and scaler
33
  self.model = load_model(model_path)
34
  with open(scaler_path, 'rb') as f:
35
  self.scaler = pickle.load(f)
36
+
37
+ # Ensure reference data is in NumPy format
38
+ ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data
39
+
40
+ # Initialize SHAP explainer
41
+ self.explainer_type = explainer_type
42
+ self.explainer = self.explainer_type(self.model, ref_data)
43
+ self.feature_names = feature_names
44
+
45
  def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
46
  """
47
  Preprocess the input DataFrame to create new features for the model.
 
56
  pd.DataFrame
57
  The preprocessed DataFrame with additional features.
58
  """
59
+ Wind_Direction_radians = np.radians(df["Wind_Direction_deg"])
60
+ Wind_X = np.sin(Wind_Direction_radians)
61
+ Wind_Y = np.cos(Wind_Direction_radians)
62
 
63
  m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
64
  m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
 
77
  m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
78
 
79
  output = {
80
+ "50m_1NPCRI": df["150m_NPCRI"],
81
  "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
82
  "150m_Traffic_Volume": df["Traffic_Volume"],
83
  "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
 
105
  output = pd.DataFrame(output, index=[0])
106
 
107
  return output
108
+
109
+ def scale(self, X: pd.DataFrame) -> np.ndarray:
110
  """
111
+ Apply the scaler used to train the model to the new data.
112
 
113
  INPUT
114
  -----
115
+ X: pd.DataFrame - The data to be scaled.
116
+
117
  OUTPUT
118
  ------
119
+ np.ndarray - The scaled data.
120
  """
121
+ return self.scaler.transform(X)
122
 
123
+ def compute_shap_values(self, X):
124
+ """
125
+ Computes SHAP values for the record.
126
+ """
127
+ # Compute SHAP values
128
+ shap_values = self.explainer.shap_values(X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(X)
129
+
130
+ # Apply squeeze only if the array has three dimensions and the last dimension is 1
131
+ if shap_values.ndim == 3 and shap_values.shape[-1] == 1:
132
+ shap_values = np.squeeze(shap_values)
133
 
134
+ return shap_values
135
 
136
+ def predict(self, X: pd.DataFrame, location=(None, None)) -> dict:
137
  """
138
+ Make a prediction on one sample and explain the prediction using SHAP.
139
 
140
  INPUT
141
  -----
142
+ X: pd.DataFrame - The data to predict a UHI index for (must be one sample).
143
+ location: tuple (longitude, latitude) - Optional location data.
144
 
145
  OUTPUT
146
  ------
147
+ dict - A dictionary containing the predicted UHI index and SHAP reasoning.
 
148
  """
 
 
149
  if X.shape[0] != 1:
150
+ raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found.")
 
 
 
151
 
152
+ # Preprocess and scale input data
153
+ X_processed = self.preprocess(X)
154
+ X_scaled = self.scale(X_processed).reshape(1, -1)
155
 
156
+ # Predict UHI index
157
  y_pred = self.model.predict(X_scaled)
 
 
158
  uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
159
 
160
+ # Compute SHAP values
161
+ shap_values = self.compute_shap_values(X_scaled)
162
+
163
+ # Extract expected base value, Ensure expected_value is a single value (not tensor)
164
+ if self.explainer_type == shap.DeepExplainer:
165
+ expected_value = np.array(self.explainer.expected_value)
166
+ else:
167
+ expected_value = self.explainer.expected_value
168
+
169
+ # Extract single value if expected_value is an array
170
+ if isinstance(expected_value, np.ndarray):
171
+ expected_value = expected_value[0]
172
+
173
+ # Compute SHAP-based final prediction
174
+ shap_final_prediction = expected_value + sum(shap_values)
175
+
176
+ # Structure feature contributions
177
+ feature_contributions = [
178
+ {
179
+ "feature": feature,
180
+ "shap_value": value,
181
+ "impact": "increase" if value > 0 else "decrease"
182
+ }
183
+ for feature, value in zip(self.feature_names, shap_values)
184
+ ]
185
+
186
+ # Create the final output
187
+ prediction_output = {
188
+ "longitude": location[0],
189
+ "latitude": location[1],
190
+ "predicted_uhi_index": uhi,
191
+ "base_value": expected_value,
192
+ "shap_final_prediction": shap_final_prediction,
193
+ "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
194
+ "feature_contributions": feature_contributions,
195
+ }
196
+
197
+ return prediction_output