Spaces:

FranciscoLozDataScience
/

uhi_resnet_model

Sleeping

App Files Files Community

FranciscoLozDataScience commited on Mar 18

Commit

094f22f

1 Parent(s): bc58ccd

added updates

Browse files

Files changed (5) hide show

UHI_explainer_ref_data.parquet +3 -0
app.py +11 -21
examples.csv +0 -0
explainer.py +0 -110
model.py +102 -37

UHI_explainer_ref_data.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5b020cfc8a638dfc6c2ed9f0b5ad6ad9ed4472f4d6a5d4a75960e89da07388
+size 223375

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import gradio as gr
 import shap
-from model import UhiModel
-from explainer import UhiExplainer
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
-MODEL = UhiModel("mixed_buffers_ResNet_model.keras","mixed_buffers_standard_scaler.pkl")
 def filter_map(uhi, longitude, latitude):
     '''
@@ -45,8 +46,8 @@ def filter_map(uhi, longitude, latitude):
     return fig
 def predict(
-        longitude, latitude, m50_NPCRI, m100_Ground_Elevation, avg_wind_speed,
-        wind_direction, traffic_volume, m150_Ground_Elevation,
         relative_humidity, m150_NDVI, m150_NDBI,
         m300_SI, m300_NPCRI, m300_Coastal_Aerosol,
         m300_Total_Building_Area_m2, m300_Building_Construction_Year, m300_Ground_Elevation,
@@ -60,10 +61,10 @@ def predict(
     # Create a dictionary with input data and dataset var names
     input_data = {
-        "50m_1NPCRI": m50_NPCRI,
         "100m_Ground_Elevation": m100_Ground_Elevation,
         "Avg_Wind_Speed": avg_wind_speed,
-        "Wind_Direction": wind_direction,
         "Traffic_Volume": traffic_volume,
         "150m_Ground_Elevation": m150_Ground_Elevation,
         "Relative_Humidity": relative_humidity,
@@ -87,23 +88,12 @@ def predict(
     input_df = pd.DataFrame(input_data, index=[0])
     #predict
-    uhi_index = MODEL.predict(input_df)
-    # explain the prediction
-    explainer = UhiExplainer(
-        model=MODEL.model,
-        explainer_type=shap.DeepExplainer,
-        X=input_df,
-        feature_names=input_df.columns,
-        ref_data=input_df,
-        shap_values=None  # Compute SHAP values on the fly
-    )
-    reason = explainer.reasoning(index=0, location=(longitude, latitude))
     # generate map
-    plot = filter_map(uhi_index, longitude, latitude)
-    return uhi_index, reason["uhi_status"], reason["feature_contributions"], plot
 def load_examples(csv_file):
     '''

 import gradio as gr
 import shap
+from model import UhiPredictor
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
+ref_data = pd.read_parquet("UHI_explainer_ref_data.parquet")
+cols = pd.read_parquet("UHI_explainer_ref_data.parquet").columns
+MODEL = UhiPredictor("mixed_buffers_ResNet_model.keras", "mixed_buffers_standard_scaler.pkl", shap.DeepExplainer, ref_data, cols)
 def filter_map(uhi, longitude, latitude):
     '''
     return fig
 def predict(
+        longitude, latitude, m150_NPCRI, m100_Ground_Elevation, avg_wind_speed,
+        wind_direction_deg, traffic_volume, m150_Ground_Elevation,
         relative_humidity, m150_NDVI, m150_NDBI,
         m300_SI, m300_NPCRI, m300_Coastal_Aerosol,
         m300_Total_Building_Area_m2, m300_Building_Construction_Year, m300_Ground_Elevation,
     # Create a dictionary with input data and dataset var names
     input_data = {
+        "150m_NPCRI": m150_NPCRI,
         "100m_Ground_Elevation": m100_Ground_Elevation,
         "Avg_Wind_Speed": avg_wind_speed,
+        "Wind_Direction_deg": wind_direction_deg,
         "Traffic_Volume": traffic_volume,
         "150m_Ground_Elevation": m150_Ground_Elevation,
         "Relative_Humidity": relative_humidity,
     input_df = pd.DataFrame(input_data, index=[0])
     #predict
+    output = MODEL.predict(input_df)
     # generate map
+    plot = filter_map(output["predicted_uhi_index"], longitude, latitude)
+    return output["predicted_uhi_index"] , output["uhi_status"], output["feature_contributions"], plot
 def load_examples(csv_file):
     '''

examples.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

explainer.py DELETED Viewed

@@ -1,110 +0,0 @@
-"""This module provides an explainer for the model."""
-import shap
-import pandas as pd
-import numpy as np
-class UhiExplainer:
-    """
-    A class for SHAP-based model explanation.
-    Attributes:
-    - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
-    - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
-    - X: Data (Pandas DataFrame) used to compute SHAP values.
-    - feature_names: List of feature names.
-    - explainer: SHAP explainer instance.
-    - shap_values: Computed SHAP values.
-    Methods:
-    - apply_shap(): Computes SHAP values.
-    - summary_plot(): Generates a SHAP summary plot.
-    - bar_plot(): Generates a bar chart of feature importance.
-    - dependence_plot(): Generates a dependence plot for a feature.
-    - force_plot(): Generates a force plot for an individual prediction.
-    - init_js(): Initializes SHAP for Jupyter Notebook.
-    - reasoning(): Provides insights on why a record received a high or low UHI index.
-    """
-    def __init__(self, model, explainer_type, X, feature_names, ref_data=None, shap_values=None):
-        """
-        Initializes the Explainer with a trained model, explainer type, and dataset.
-        Parameters:
-        - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
-        - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
-        - X: Data (Pandas DataFrame) used to compute SHAP values.
-        - feature_names: List of feature names.
-        - ref_data (optional): The reference dataset (background dataset) is used by SHAP to estimate the expected output of the model
-        - shap_values (optional): Precomputed SHAP values
-        """
-        self.model = model
-        self.explainer_type = explainer_type
-        self.X = np.array(X) if isinstance(X, pd.DataFrame) else X  # Ensure NumPy format
-        if ref_data is not None:
-            ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data # Ensure NumPy format
-        self.feature_names = feature_names
-        self.explainer = explainer_type(model, ref_data)  # Initialize explainer
-        # Compute SHAP values
-        if shap_values is not None:
-            self.shap_values = shap_values
-        else:
-            self.shap_values = self.explainer.shap_values(self.X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(self.X)
-        # Apply squeeze only if the array has three dimensions and the last dimension is 1
-        if self.shap_values.ndim == 3 and self.shap_values.shape[-1] == 1:
-            self.shap_values = np.squeeze(self.shap_values)
-    def reasoning(self, index=0, location=(None, None)):
-        """
-        Provides insights on why the record received a high or low UHI index.
-        Parameters:
-            index (int): The index of the observation of interest.
-            location (tuple) (optional): The location of the record (long, lat).
-        Returns:
-            dict: The insights for the selected record.
-        """
-        # Ensure expected_value is a single value (not tensor)
-        if self.explainer_type == shap.DeepExplainer:
-            expected_value = np.array(self.explainer.expected_value)
-        else:
-            expected_value = self.explainer.expected_value
-        # Extract single value if expected_value is an array
-        if isinstance(expected_value, np.ndarray):
-            expected_value = expected_value[0]
-        # Validate record index
-        if index >= len(self.shap_values) or index < 0:
-            return {"error": "Invalid record index"}
-        # Extract SHAP values for the specified record
-        record_shap_values = self.shap_values[index]
-        # Compute SHAP-based final prediction
-        shap_final_prediction = expected_value + sum(record_shap_values)
-        # Structure feature contributions
-        feature_contributions = [
-            {
-                "feature": feature,
-                "shap_value": value,
-                "impact": "increase" if value > 0 else "decrease"
-            }
-            for feature, value in zip(self.feature_names, record_shap_values)
-        ]
-        # Create JSON structure
-        shap_json = {
-            "record_index": index,
-            "longitude": location[0],
-            "latitude": location[1],
-            "base_value": expected_value,
-            "shap_final_prediction": shap_final_prediction,  # SHAP-based predicted value
-            "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
-            "feature_contributions": feature_contributions,
-        }
-        return shap_json

model.py CHANGED Viewed

@@ -2,21 +2,46 @@ import numpy as np
 import pandas as pd
 from tensorflow.keras.models import load_model
 import pickle
-class UhiModel:
     """
-    Urban Heat Island Model Class that can predict new instances
     INPUTS
     ---
-    model_path: the path to the model file
-    scaler_path: the path to the standard scaler file
     """
-    def __init__(self, model_path, scaler_path):
         self.model = load_model(model_path)
         with open(scaler_path, 'rb') as f:
             self.scaler = pickle.load(f)
     def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         Preprocess the input DataFrame to create new features for the model.
@@ -31,8 +56,9 @@ class UhiModel:
         pd.DataFrame
             The preprocessed DataFrame with additional features.
         """
-        Wind_X = np.sin(df["Wind_Direction"])
-        Wind_Y = np.cos(df["Wind_Direction"])
         m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
         m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
@@ -51,7 +77,7 @@ class UhiModel:
         m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
         output = {
-            "50m_1NPCRI": df["50m_1NPCRI"],
             "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
             "150m_Traffic_Volume": df["Traffic_Volume"],
             "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
@@ -79,54 +105,93 @@ class UhiModel:
         output = pd.DataFrame(output, index=[0])
         return output
-    def scale(self, X):
         """
-        Apply the scaler used to train the model to the new data
         INPUT
         -----
-        X: the data to be scaled
         OUTPUT
         ------
-        returns the scaled data
         """
-        new_data_scaled = self.scaler.transform(X)
-        return new_data_scaled
-    def predict(self, X: pd.DataFrame) -> float:
         """
-        Make a prediction on one sample using the loaded model.
         INPUT
         -----
-        X: pd.DataFrame
-            The data to predict a UHI index for. Must contain only one sample.
         OUTPUT
         ------
-        str:
-            Predicted UHI index.
         """
-        # Check that input contains only one sample
         if X.shape[0] != 1:
-            raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found")
-        # Preprocess the input data to create new features
-        X_processed = self.preprocess(X)
-        # Scale the input data
-        X_scaled = self.scale(X_processed)
-        # Make prediction
         y_pred = self.model.predict(X_scaled)
-        # Extract the predicted UHI index (assuming it's a single value)
         uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
-        # Return UHI
-        return uhi

 import pandas as pd
 from tensorflow.keras.models import load_model
 import pickle
+import shap
+class UhiPredictor:
     """
+    Urban Heat Island Predictor Class that predicts new instances and explains the prediction using SHAP.
     INPUTS
     ---
+    model_path: str - Path to the trained model file.
+    scaler_path: str - Path to the standard scaler file.
+    explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
+    ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
+    feature_names: list - Feature names for SHAP analysis.
     """
+    def __init__(self, model_path, scaler_path, explainer_type, ref_data, feature_names):
+        """
+        Initializes the UHI predictor with a trained model, scaler, and SHAP explainer.
+        INPUTS
+        ---
+        model_path: str - Path to the model file.
+        scaler_path: str - Path to the standard scaler file.
+        explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
+        ref_data: pd.DataFrame or np.array - Background dataset for SHAP explainer.
+        feature_names: list - Feature names for SHAP explanation.
+        """
+        # Load the model and scaler
         self.model = load_model(model_path)
         with open(scaler_path, 'rb') as f:
             self.scaler = pickle.load(f)
+        # Ensure reference data is in NumPy format
+        ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data
+        # Initialize SHAP explainer
+        self.explainer_type = explainer_type
+        self.explainer = self.explainer_type(self.model, ref_data)
+        self.feature_names = feature_names
     def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
         """
         Preprocess the input DataFrame to create new features for the model.
         pd.DataFrame
             The preprocessed DataFrame with additional features.
         """
+        Wind_Direction_radians = np.radians(df["Wind_Direction_deg"])
+        Wind_X = np.sin(Wind_Direction_radians)
+        Wind_Y = np.cos(Wind_Direction_radians)
         m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
         m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
         m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
         output = {
+            "50m_1NPCRI": df["150m_NPCRI"],
             "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
             "150m_Traffic_Volume": df["Traffic_Volume"],
             "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
         output = pd.DataFrame(output, index=[0])
         return output
+    def scale(self, X: pd.DataFrame) -> np.ndarray:
         """
+        Apply the scaler used to train the model to the new data.
         INPUT
         -----
+        X: pd.DataFrame - The data to be scaled.
         OUTPUT
         ------
+        np.ndarray - The scaled data.
         """
+        return self.scaler.transform(X)
+    def compute_shap_values(self, X):
+        """
+        Computes SHAP values for the record.
+        """
+        # Compute SHAP values
+        shap_values = self.explainer.shap_values(X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(X)
+        # Apply squeeze only if the array has three dimensions and the last dimension is 1
+        if shap_values.ndim == 3 and shap_values.shape[-1] == 1:
+            shap_values = np.squeeze(shap_values)
+        return shap_values
+    def predict(self, X: pd.DataFrame, location=(None, None)) -> dict:
         """
+        Make a prediction on one sample and explain the prediction using SHAP.
         INPUT
         -----
+        X: pd.DataFrame - The data to predict a UHI index for (must be one sample).
+        location: tuple (longitude, latitude) - Optional location data.
         OUTPUT
         ------
+        dict - A dictionary containing the predicted UHI index and SHAP reasoning.
         """
         if X.shape[0] != 1:
+            raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found.")
+        # Preprocess and scale input data
+        X_processed = self.preprocess(X)
+        X_scaled = self.scale(X_processed).reshape(1, -1)
+        # Predict UHI index
         y_pred = self.model.predict(X_scaled)
         uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
+        # Compute SHAP values
+        shap_values = self.compute_shap_values(X_scaled)
+        # Extract expected base value, Ensure expected_value is a single value (not tensor)
+        if self.explainer_type == shap.DeepExplainer:
+            expected_value = np.array(self.explainer.expected_value)
+        else:
+            expected_value = self.explainer.expected_value
+        # Extract single value if expected_value is an array
+        if isinstance(expected_value, np.ndarray):
+            expected_value = expected_value[0]
+        # Compute SHAP-based final prediction
+        shap_final_prediction = expected_value + sum(shap_values)
+        # Structure feature contributions
+        feature_contributions = [
+            {
+                "feature": feature,
+                "shap_value": value,
+                "impact": "increase" if value > 0 else "decrease"
+            }
+            for feature, value in zip(self.feature_names, shap_values)
+        ]
+        # Create the final output
+        prediction_output = {
+            "longitude": location[0],
+            "latitude": location[1],
+            "predicted_uhi_index": uhi,
+            "base_value": expected_value,
+            "shap_final_prediction": shap_final_prediction,
+            "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
+            "feature_contributions": feature_contributions,
+        }
+        return prediction_output