Spaces:

EDS-lab
/

EnFoBench-ElectricityDemand

Running

File size: 7,273 Bytes

f87d2ee
f1e08ee
f87d2ee
f1e08ee
 
f87d2ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1e08ee
f87d2ee

import pandas as pd
import streamlit as st
import plotly.express as px


def buildings_view(data):
    buildings = (
        data[
            [
                "unique_id",
                "metadata.cluster_size",
                "metadata.building_class",
                "metadata.location_id",
                "metadata.timezone",
                "dataset.available_history.days",
            ]
        ]
        .groupby("unique_id")
        .first()
        .rename(
            columns={
                "metadata.cluster_size": "Cluster size",
                "metadata.building_class": "Building class",
                "metadata.location_id": "Location ID",
                "metadata.timezone": "Timezone",
                "dataset.available_history.days": "Available history (days)",
            }
        )
    )

    st.metric("Number of buildings", len(buildings))
    st.divider()

    st.markdown("### Buildings")
    st.dataframe(
        buildings,
        use_container_width=True,
        column_config={
            "Available history (days)": st.column_config.ProgressColumn(
                "Available history (days)",
                help="Available training data during the first prediction.",
                format="%f",
                min_value=0,
                max_value=1000,
            ),
        },
    )

    left, right = st.columns(2, gap="large")
    with left:
        st.markdown("#### Building classes")
        fig = px.pie(
            buildings.groupby("Building class").size().reset_index(),
            values=0,
            names="Building class",
        )
        st.plotly_chart(fig, use_container_width=True)

    with right:
        st.markdown("#### Timezones")
        fig = px.pie(
            buildings.groupby("Timezone").size().reset_index(),
            values=0,
            names="Timezone",
        )
        st.plotly_chart(fig, use_container_width=True)


def models_view(data):
    models = (
        data[
            [
                "model",
                "cv_config.folds",
                "cv_config.horizon",
                "cv_config.step",
                "cv_config.time",
                "model_info.repository",
                "model_info.tag",
                "model_info.variate_type",
            ]
        ]
        .groupby("model")
        .first()
        .rename(
            columns={
                "cv_config.folds": "CV Folds",
                "cv_config.horizon": "CV Horizon",
                "cv_config.step": "CV Step",
                "cv_config.time": "CV Time",
                "model_info.repository": "Image Repository",
                "model_info.tag": "Image Tag",
                "model_info.variate_type": "Variate type",
            }
        )
    )

    st.metric("Number of models", len(models))
    st.divider()

    st.markdown("### Models")
    st.dataframe(models, use_container_width=True)

    left, right = st.columns(2, gap="large")
    with left:
        st.markdown("#### Variate types")
        fig = px.pie(
            models.groupby("Variate type").size().reset_index(),
            values=0,
            names="Variate type",
        )
        st.plotly_chart(fig, use_container_width=True)

    with right:
        st.markdown("#### Frameworks")
        _df = models.copy()
        _df["Framework"] = _df.index.str.split(".").str[0]
        fig = px.pie(
            _df.groupby("Framework").size().reset_index(),
            values=0,
            names="Framework",
        )
        st.plotly_chart(fig, use_container_width=True)


def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
    data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
        by="model", ascending=True
    )

    left, right = st.columns(2, gap="small")
    with left:
        metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
    with right:
        aggregation = st.selectbox(
            "Aggregation", ["min", "mean", "median", "max", "std"], index=1
        )
    st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
    fig = px.box(
        data_to_plot,
        x=f"{metric}.{aggregation}",
        y="model",
        color="model",
        points="all",
    )
    fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
    st.plotly_chart(fig, use_container_width=True)

    st.divider()

    left, right = st.columns(2, gap="large")
    with left:
        x_metric = st.selectbox(
            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
        )
        x_aggregation = st.selectbox(
            "Aggregation",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="x_aggregation",
        )
    with right:
        y_metric = st.selectbox(
            "Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
        )
        y_aggregation = st.selectbox(
            "Aggregation",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="y_aggregation",
        )

    st.markdown(
        f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
    )
    fig = px.scatter(
        data_to_plot,
        x=f"{x_metric}.{x_aggregation}",
        y=f"{y_metric}.{y_aggregation}",
        color="model",
    )
    fig.update_layout(height=600)
    st.plotly_chart(fig, use_container_width=True)

    st.divider()

    left, right = st.columns(2, gap="small")
    with left:
        metric = st.selectbox(
            "Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
        )
    with right:
        aggregation = st.selectbox(
            "Aggregation across folds",
            ["min", "mean", "median", "max", "std"],
            index=1,
            key="table_aggregation",
        )

    metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
        [
            f"{metric}.min",
            f"{metric}.mean",
            f"{metric}.median",
            f"{metric}.max",
            f"{metric}.std",
        ]
    ]

    def custom_table(styler):
        styler.background_gradient(cmap="seismic", axis=0)
        styler.format(precision=2)

        # center text and increase font size
        styler.map(lambda x: "text-align: center; font-size: 14px;")
        return styler

    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
    styled_table = metrics_table.style.pipe(custom_table)
    st.dataframe(styled_table, use_container_width=True)

    metrics_table = (
        data_to_plot.groupby(["model", "unique_id"])
        .apply(aggregation, numeric_only=True)
        .reset_index()
        .pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
    )

    def custom_table(styler):
        styler.background_gradient(cmap="seismic", axis=None)
        styler.format(precision=2)

        # center text and increase font size
        styler.map(lambda x: "text-align: center; font-size: 14px;")
        return styler

    st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
    styled_table = metrics_table.style.pipe(custom_table)
    st.dataframe(styled_table, use_container_width=True)