Spaces:

FranciscoLozDataScience
/

uhi_resnet_model

Sleeping

App Files Files Community

FranciscoLozDataScience commited on Mar 17

Commit

e0d8ab4

1 Parent(s): 60fa23b

publish app code

Browse files

Files changed (8) hide show

README.md +4 -4
app.py +208 -0
examples.csv +0 -0
explainer.py +111 -0
mixed_buffers_ResNet_model.keras +3 -0
mixed_buffers_standard_scaler.pkl +3 -0
model.py +133 -0
requirements.txt +4 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Uhi Resnet Model
-emoji: 📚
-colorFrom: red
-colorTo: green
 sdk: gradio
 sdk_version: 5.21.0
 app_file: app.py

 ---
+title: Play with an Urban Heat Island ResNet Model
+emoji: 🔥
+colorFrom: gray
+colorTo: red
 sdk: gradio
 sdk_version: 5.21.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import gradio as gr
+import shap
+from model import UhiModel
+from explainer import UhiExplainer
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+MODEL = UhiModel("mixed_buffers_ResNet_model.keras","mixed_buffers_standard_scaler.pkl")
+def filter_map(uhi, longitude, latitude):
+    '''
+    This function generates a map based on uhi prediction
+    '''
+    #set up custom data
+    data = [uhi, longitude, latitude]
+    # Create the plot
+    fig = go.Figure(go.Scattermapbox(
+        lat=latitude,
+        lon=longitude,
+        mode='markers',
+        marker=go.scattermapbox.Marker(
+            size=6
+        ),
+        hoverinfo="text",
+        hovertemplate='<b>UHI Index</b>: %{customdata[0]}<br><b>long</b>: %{customdata[1]}<br><b>lat</b>: %{customdata[2]}<br>',
+        customdata=data
+    ))
+    fig.update_layout(
+        mapbox_style="open-street-map",
+        hovermode='closest',
+        mapbox=dict(
+            bearing=0,
+            center=go.layout.mapbox.Center(
+                lat=40.7128,
+                lon=-74.0060  # Default to New York City for initial view
+            ),
+            pitch=0,
+            zoom=10
+        ),
+    )
+    return fig
+def predict(
+        longitude, latitude, m50_NPCRI, m100_Ground_Elevation, avg_wind_speed,
+        wind_direction, traffic_volume, m150_Ground_Elevation,
+        relative_humidity, m150_NDVI, m150_NDBI,
+        m300_SI, m300_NPCRI, m300_Coastal_Aerosol,
+        m300_Total_Building_Area_m2, m300_Building_Construction_Year, m300_Ground_Elevation,
+        m300_Building_Height, m300_Building_Count, m300_NDVI,
+        m300_NDBI, m300_Building_Density, solar_flux
+    ):
+    '''
+    Predict the UHI index for the data inputed, Longitude and Latitude are used to generate a map
+    and do not affect the UHI index prediction.
+    '''
+    # Create a dictionary with input data and dataset var names
+    input_data = {
+        "50m_1NPCRI": m50_NPCRI,
+        "100m_Ground_Elevation": m100_Ground_Elevation,
+        "Avg_Wind_Speed": avg_wind_speed,
+        "Wind_Direction": wind_direction,
+        "Traffic_Volume": traffic_volume,
+        "150m_Ground_Elevation": m150_Ground_Elevation,
+        "Relative_Humidity": relative_humidity,
+        "150m_NDVI": m150_NDVI,
+        "150m_NDBI": m150_NDBI,
+        "300m_SI": m300_SI,
+        "300m_NPCRI": m300_NPCRI,
+        "300m_Coastal_Aerosol": m300_Coastal_Aerosol,
+        "300m_Total_Building_Area_m2": m300_Total_Building_Area_m2,
+        "300m_Building_Construction_Year": m300_Building_Construction_Year,
+        "300m_Ground_Elevation": m300_Ground_Elevation,
+        "300m_Building_Height": m300_Building_Height,
+        "300m_Building_Count": m300_Building_Count,
+        "300m_NDVI": m300_NDVI,
+        "300m_NDBI": m300_NDBI,
+        "300m_Building_Density": m300_Building_Density,
+        "Solar_Flux": solar_flux
+    }
+    # Convert to DataFrame
+    input_df = pd.DataFrame(input_data, index=[0])
+    #predict
+    uhi_index = MODEL.predict(input_df)
+    # explain the prediction
+    explainer = UhiExplainer(
+        model=MODEL.model,
+        explainer_type=shap.DeepExplainer,
+        X=input_df,
+        feature_names=input_df.columns,
+        ref_data=input_df,
+        shap_values=None  # Compute SHAP values on the fly
+    )
+    reason = explainer.reasoning(index=0, location=(longitude, latitude))
+    # generate map
+    plot = filter_map(uhi_index, longitude, latitude)
+    return uhi_index, reason["uhi_status"], reason["feature_contributions"], plot
+def load_examples(csv_file):
+    '''
+    Load examples from csv file
+    '''
+    # Read examples from CSV file
+    df = pd.read_csv(csv_file)
+    # Convert DataFrame to a list of lists
+    examples = df.values.tolist()
+    return examples
+def load_interface():
+    '''
+    Configure Gradio interface
+    '''
+    #set blocks
+    info_page = gr.Blocks()
+    with info_page:
+        # set title and description
+        gr.Markdown(
+        """
+        # ResNet model for Predicting Urban Heat Island (UHI) Index
+        **Contributors**: Francisco Lozano, Dalton Knapp, Adam Zizi\n
+        **University**: Depaul University\n
+        ## Overview
+        Our project focused on creating a micro-scale machine learning model that predicts the locations and severity of the UHI effect.
+        The model used various datasets, including near-surface air temperatures, building footprint data, weather data, and
+        satellite data, to identify key drivers of UHI. This model provides insights into urban areas that are most affected by UHI,
+        enabling urban planners and policymakers to take effective mitigation actions.
+        >NOTE: The longitude and latitude inputs are used to identify the location of the prediction, but they do not affect the UHI index prediction.\n
+        ## Repository
+        The code for this project is available on GitHub. It includes the model training, evaluation, and prediction scripts, as well as
+        the datasets used for training and testing. The repository also contains Jupyter notebooks that provide detailed explanations of the model's
+        architecture, training process, and evaluation metrics. The notebooks include visualizations of the model's performance and feature importance analysis.\n
+        [Project Repo](https://github.com/FranciscoLozCoding/cooling_with_code)
+        """
+        )
+    # set inputs and outputs for the model
+    longitude = gr.Number(label="Longitude", precision=5, info="The Longitude of the location")
+    latitude = gr.Number(label="Latitude", precision=5, info="The Latitude of the location")
+    m50_NPCRI = gr.Number(label="50m NPCRI", precision=5, info="The average Normalized Difference Vegetation Index in a 50m Buffer Zone")
+    m100_Ground_Elevation = gr.Number(label="100m Ground Elevation", precision=5, info="The average Ground Elevation in a 100m Buffer Zone")
+    avg_wind_speed = gr.Number(label="Avg Wind Speed [m/s]", precision=5, info="The average Wind Speed at the location")
+    wind_direction = gr.Number(label="Wind Direction [degrees]", precision=5, info="The average Wind Direction at the location")
+    traffic_volume = gr.Number(label="Traffic Volume", precision=5, info="The Traffic Volume at the location")
+    m150_Ground_Elevation = gr.Number(label="150m Ground Elevation", precision=5, info="The average Ground Elevation in a 150m Buffer Zone")
+    relative_humidity = gr.Number(label="Relative Humidity [percent]", precision=5, info="The average Relative Humidity at the location")
+    m150_NDVI = gr.Number(label="150m NDVI", precision=5, info="The average Normalized Difference Vegetation Index in a 150m Buffer Zone")
+    m150_NDBI = gr.Number(label="150m NDBI", precision=5, info="The average Normalized Difference Built-up Index in a 150m Buffer Zone")
+    m300_SI = gr.Number(label="300m SI", precision=5, info="The average Shadow Index in a 300m Buffer Zone")
+    m300_NPCRI = gr.Number(label="300m NPCRI", precision=5, info="The average Normalized Pigment Chlorophyll Ratio Index in a 300m Buffer Zone")
+    m300_Coastal_Aerosol = gr.Number(label="300m Coastal Aerosol", precision=5, info="The average Coastal Aerosol in a 300m Buffer Zone")
+    m300_Total_Building_Area_m2 = gr.Number(label="300m Total Building Area(m2)", precision=5, info="The Total Building Area in a 300m Buffer Zone")
+    m300_Building_Construction_Year = gr.Number(label="300m Building Construction Year", precision=5, info="The average Building Construction Year in a 300m Buffer Zone")
+    m300_Ground_Elevation = gr.Number(label="300m Ground Elevation", precision=5, info="The average Ground Elevation in a 300m Buffer Zone")
+    m300_Building_Height = gr.Number(label="300m Building Height", precision=5, info="The average Building Height in a 300m Buffer Zone")
+    m300_Building_Count = gr.Number(label="300m Building Count", precision=5, info="The average Building Count in a 300m Buffer Zone")
+    m300_NDVI = gr.Number(label="300m NDVI", precision=5, info="The average Normalized Difference Vegetation Index in a 300m Buffer Zone")
+    m300_NDBI = gr.Number(label="300m NDBI", precision=5, info="The average Normalized Difference Built-up Index in a 300m Buffer Zone")
+    m300_Building_Density = gr.Number(label="300m Building Density", precision=5, info="The average Building Density in a 300m Buffer Zone")
+    solar_flux = gr.Number(label="Solar Flux [W/m^2]", precision=5, info="The average Solar Flux at the location")
+    inputs = [longitude, latitude, m50_NPCRI, m100_Ground_Elevation, avg_wind_speed, wind_direction,
+              traffic_volume, m150_Ground_Elevation, relative_humidity, m150_NDVI,
+              m150_NDBI, m300_SI, m300_NPCRI, m300_Coastal_Aerosol, m300_Total_Building_Area_m2,
+              m300_Building_Construction_Year, m300_Ground_Elevation, m300_Building_Height, m300_Building_Count,
+              m300_NDVI, m300_NDBI, m300_Building_Density, solar_flux]
+    uhi = gr.number(label="Predicted UHI Index", precision=5)
+    # set model explainer outputs
+    uhi_label = gr.Label(label="Predicted Status based on UHI Index")
+    feature_contributions = gr.JSON(label="Feature Contributions", info="The contributions of each feature to the UHI index prediction")
+    # Urban Location
+    plot = gr.Plot(label="Urban Location", info="A plot showing the location of the prediction based on the longitude and latitude inputs")
+    model_page = gr.Interface(
+        predict,
+        inputs=inputs,
+        outputs=[uhi, uhi_label, feature_contributions, plot],
+        live=True,
+        examples=load_examples("examples.csv"),
+        title="Interact with The ResNet UHI Model",
+        description="This model predicts the Urban Heat Island (UHI) index based on various environmental and urban factors. Adjust the inputs to see how they affect the UHI index prediction.",
+    )
+    iface = gr.TabbedInterface(
+        [info_page, model_page],
+        ["Information", "UHI Model"]
+    )
+    iface.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/"])
+if __name__ == "__main__":
+    load_interface()

examples.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

explainer.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""This module provides an explainer for the model."""
+import shap
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+class UhiExplainer:
+    """
+    A class for SHAP-based model explanation.
+    Attributes:
+    - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
+    - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
+    - X: Data (Pandas DataFrame) used to compute SHAP values.
+    - feature_names: List of feature names.
+    - explainer: SHAP explainer instance.
+    - shap_values: Computed SHAP values.
+    Methods:
+    - apply_shap(): Computes SHAP values.
+    - summary_plot(): Generates a SHAP summary plot.
+    - bar_plot(): Generates a bar chart of feature importance.
+    - dependence_plot(): Generates a dependence plot for a feature.
+    - force_plot(): Generates a force plot for an individual prediction.
+    - init_js(): Initializes SHAP for Jupyter Notebook.
+    - reasoning(): Provides insights on why a record received a high or low UHI index.
+    """
+    def __init__(self, model, explainer_type, X, feature_names, ref_data=None, shap_values=None):
+        """
+        Initializes the Explainer with a trained model, explainer type, and dataset.
+        Parameters:
+        - model: Trained model (e.g., RandomForestRegressor, XGBRegressor).
+        - explainer_type: SHAP explainer class (e.g., shap.TreeExplainer, shap.KernelExplainer).
+        - X: Data (Pandas DataFrame) used to compute SHAP values.
+        - feature_names: List of feature names.
+        - ref_data (optional): The reference dataset (background dataset) is used by SHAP to estimate the expected output of the model
+        - shap_values (optional): Precomputed SHAP values
+        """
+        self.model = model
+        self.explainer_type = explainer_type
+        self.X = np.array(X) if isinstance(X, pd.DataFrame) else X  # Ensure NumPy format
+        if ref_data is not None:
+            ref_data = np.array(ref_data) if isinstance(ref_data, pd.DataFrame) else ref_data # Ensure NumPy format
+        self.feature_names = feature_names
+        self.explainer = explainer_type(model, ref_data)  # Initialize explainer
+        # Compute SHAP values
+        if shap_values is not None:
+            self.shap_values = shap_values
+        else:
+            self.shap_values = self.explainer.shap_values(self.X, check_additivity=False) if self.explainer_type == shap.DeepExplainer else self.explainer.shap_values(self.X)
+        # Apply squeeze only if the array has three dimensions and the last dimension is 1
+        if self.shap_values.ndim == 3 and self.shap_values.shape[-1] == 1:
+            self.shap_values = np.squeeze(self.shap_values)
+    def reasoning(self, index=0, location=(None, None)):
+        """
+        Provides insights on why the record received a high or low UHI index.
+        Parameters:
+            index (int): The index of the observation of interest.
+            location (tuple) (optional): The location of the record (long, lat).
+        Returns:
+            dict: The insights for the selected record.
+        """
+        # Ensure expected_value is a single value (not tensor)
+        if self.explainer_type == shap.DeepExplainer:
+            expected_value = np.array(self.explainer.expected_value)
+        else:
+            expected_value = self.explainer.expected_value
+        # Extract single value if expected_value is an array
+        if isinstance(expected_value, np.ndarray):
+            expected_value = expected_value[0]
+        # Validate record index
+        if index >= len(self.shap_values) or index < 0:
+            return {"error": "Invalid record index"}
+        # Extract SHAP values for the specified record
+        record_shap_values = self.shap_values[index]
+        # Compute SHAP-based final prediction
+        shap_final_prediction = expected_value + sum(record_shap_values)
+        # Structure feature contributions
+        feature_contributions = [
+            {
+                "feature": feature,
+                "shap_value": value,
+                "impact": "increase" if value > 0 else "decrease"
+            }
+            for feature, value in zip(self.feature_names, record_shap_values)
+        ]
+        # Create JSON structure
+        shap_json = {
+            "record_index": index,
+            "longitude": location[0],
+            "latitude": location[1],
+            "base_value": expected_value,
+            "shap_final_prediction": shap_final_prediction,  # SHAP-based predicted value
+            "uhi_status": "Urban Heat Island" if shap_final_prediction > 1 else "Cooler Region",
+            "feature_contributions": feature_contributions,
+        }
+        return shap_json

mixed_buffers_ResNet_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5420fdce9a338369c6c88bb33fcdfdc5bfb0ed8e674fdd64afa52453fcddbf1
+size 43663843

mixed_buffers_standard_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3961d88dc1644f33012bcab972d6e44aec2a0028502412a009bbc58905f347d
+size 1605

model.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import pandas as pd
+from tensorflow.keras.models import load_model
+import pickle
+class UhiModel:
+    """
+    Urban Heat Island Model Class that can predict new instances
+    INPUTS
+    ---
+    model_path: the path to the model file
+    scaler_path: the path to the standard scaler file
+    """
+    def __init__(self, model_path, scaler_path):
+        self.model = load_model(model_path)
+        with open(scaler_path, 'rb') as f:
+            self.scaler = pickle.load(f)
+    def preprocess(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Preprocess the input DataFrame to create new features for the model.
+        INPUT
+        -----
+        df: pd.DataFrame
+            The input DataFrame containing the features.
+        OUTPUT
+        ------
+        pd.DataFrame
+            The preprocessed DataFrame with additional features.
+        """
+        Wind_X = np.sin(df["Wind_Direction"])
+        Wind_Y = np.cos(df["Wind_Direction"])
+        m100_Elevation_Wind_X = df["100m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_X
+        m150_Elevation_Wind_Y = df["150m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
+        m150_Humidity_NDVI = df["Relative_Humidity"] * df["150m_NDVI"]
+        m150_Traffic_NDBI = df["Traffic_Volume"] * df["150m_NDBI"]
+        m300_Building_Wind_X = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_X
+        m300_Building_Wind_Y = df["300m_Building_Height"] * df["Avg_Wind_Speed"] * Wind_Y
+        m300_Elevation_Wind_Y = df["300m_Ground_Elevation"] * df["Avg_Wind_Speed"] * Wind_Y
+        m300_BldgHeight_Count = df["300m_Building_Height"] * df["300m_Building_Count"]
+        m300_TotalBuildingArea_NDVI = df["300m_Total_Building_Area_m2"] * df["300m_NDVI"]
+        m300_Traffic_NDVI = df["Traffic_Volume"] * df["300m_NDVI"]
+        m300_Traffic_NDBI = df["Traffic_Volume"] * df["300m_NDBI"]
+        m300_Building_Aspect_Ratio = df["300m_Building_Height"] / np.sqrt(df["300m_Total_Building_Area_m2"] + 1e-6)
+        m300_Sky_View_Factor = 1 - df["300m_Building_Density"]
+        m300_Canopy_Cover_Ratio = df["300m_NDVI"] / (df["300m_Building_Density"] + 1e-6)
+        m300_GHG_Proxy = df["300m_Building_Count"] * df["Traffic_Volume"] * df["Solar_Flux"]
+        output = {
+            "50m_1NPCRI": df["50m_1NPCRI"],
+            "100m_Elevation_Wind_X": m100_Elevation_Wind_X,
+            "150m_Traffic_Volume": df["Traffic_Volume"],
+            "150m_Elevation_Wind_Y": m150_Elevation_Wind_Y,
+            "150m_Humidity_NDVI": m150_Humidity_NDVI,
+            "150m_Traffic_NDBI": m150_Traffic_NDBI,
+            "300m_SI": df["300m_SI"],
+            "300m_NPCRI": df["300m_NPCRI"],
+            "300m_Coastal_Aerosol": df["300m_Coastal_Aerosol"],
+            "300m_Total_Building_Area_m2": df["300m_Total_Building_Area_m2"],
+            "300m_Building_Construction_Year": df["300m_Building_Construction_Year"],
+            "300m_Ground_Elevation": df["300m_Ground_Elevation"],
+            "300m_Building_Wind_X": m300_Building_Wind_X,
+            "300m_Building_Wind_Y": m300_Building_Wind_Y,
+            "300m_Elevation_Wind_Y": m300_Elevation_Wind_Y,
+            "300m_BldgHeight_Count": m300_BldgHeight_Count,
+            "300m_TotalBuildingArea_NDVI": m300_TotalBuildingArea_NDVI,
+            "300m_Traffic_NDVI": m300_Traffic_NDVI,
+            "300m_Traffic_NDBI": m300_Traffic_NDBI,
+            "300m_Building_Aspect_Ratio": m300_Building_Aspect_Ratio,
+            "300m_Sky_View_Factor": m300_Sky_View_Factor,
+            "300m_Canopy_Cover_Ratio": m300_Canopy_Cover_Ratio,
+            "300m_GHG_Proxy": m300_GHG_Proxy
+        }
+        return output
+    def scale(self, X):
+        """
+        Apply the scaler used to train the model to the new data
+        INPUT
+        -----
+        X: the data to be scaled
+        OUTPUT
+        ------
+        returns the scaled data
+        """
+        new_data_scaled = self.scaler.transform(X)
+        return new_data_scaled
+    def predict(self, X: pd.DataFrame) -> float:
+        """
+        Make a prediction on one sample using the loaded model.
+        INPUT
+        -----
+        X: pd.DataFrame
+            The data to predict a UHI index for. Must contain only one sample.
+        OUTPUT
+        ------
+        str:
+            Predicted UHI index.
+        """
+        # Check that input contains only one sample
+        if X.shape[0] != 1:
+            raise ValueError(f"Input array must contain only one sample, but {X.shape[0]} samples were found")
+        # Preprocess the input data to create new features
+        X_processed = self.preprocess(X)
+        # Scale the input data
+        X_scaled = self.scale(X_processed)
+        # Ensure the scaled data is 2D
+        X_scaled = X_scaled.reshape(1, -1)
+        # Make prediction
+        y_pred = self.model.predict(X_scaled)
+        # Extract the predicted UHI index (assuming it's a single value)
+        uhi = y_pred[0][0] if y_pred.ndim == 2 else y_pred[0]
+        # Return UHI
+        return uhi

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==5.14.0
+shap==0.46.0
+tensorflow[and-cuda]==2.18.0
+plotly==6.0.*