attila-balint-kul's picture
added an overview with links and leaderboard
2429cdc verified
raw
history blame
12.3 kB
import pandas as pd
import streamlit as st
import plotly.express as px
from utils import get_leaderboard
def header() -> None:
st.title("EnFoBench - Electricity Demand")
st.divider()
def logos() -> None:
left, right = st.columns(2)
with left:
st.image("./images/ku_leuven_logo.png")
with right:
st.image("./images/energyville_logo.png")
def model_selector(models: list[str]) -> set[str]:
# Group models by their prefix
model_groups: dict[str, list[str]] = {}
for model in models:
group, model_name = model.split(".", maxsplit=1)
if group not in model_groups:
model_groups[group] = []
model_groups[group].append(model_name)
models_to_plot = set()
st.header("Models to include")
left, right = st.columns(2)
with left:
select_none = st.button("Select None", use_container_width=True)
if select_none:
for model in models:
st.session_state[model] = False
with right:
select_all = st.button("Select All", use_container_width=True)
if select_all:
for model in models:
st.session_state[model] = True
for model_group, models in model_groups.items():
st.text(model_group)
for model_name in models:
to_plot = st.checkbox(
model_name, value=True, key=f"{model_group}.{model_name}"
)
if to_plot:
models_to_plot.add(f"{model_group}.{model_name}")
return models_to_plot
def overview_view(data):
st.markdown("""
[EnFoBench](https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit)
is a community driven benchmarking framework for energy forecasting models.
This dashboard presents the results of the electricity demand forecasting usecase. All models were cross-validated
on **365 days** of day ahead forecasting horizon *(10AM until midnight of the next day)*.
""")
st.divider()
st.markdown("## Leaderboard")
leaderboard = get_leaderboard(data, ["MAE.mean", "RMSE.mean", "rMAE.mean"])
left, middle, right = st.columns(3)
with left:
best_models_mae = leaderboard.sort_values("MAE.mean", ascending=False).head(10).sort_values("MAE.mean")
fig = px.bar(best_models_mae, x="MAE.mean", y=best_models_mae.index)
fig.update_layout(title="Top 10 models by MAE", xaxis_title="", yaxis_title="Model")
st.plotly_chart(fig, use_container_width=True)
with middle:
best_models_mae = leaderboard.sort_values("RMSE.mean", ascending=False).head(10).sort_values("RMSE.mean")
fig = px.bar(best_models_mae, x="RMSE.mean", y=best_models_mae.index)
fig.update_layout(title="Top 10 models by RMSE", xaxis_title="", yaxis_title="")
st.plotly_chart(fig, use_container_width=True)
with right:
best_models_mae = leaderboard.sort_values("rMAE.mean", ascending=False).head(10).sort_values("rMAE.mean")
fig = px.bar(best_models_mae, x="rMAE.mean", y=best_models_mae.index)
fig.update_layout(title="Top 10 models by rMAE", xaxis_title="", yaxis_title="")
st.plotly_chart(fig, use_container_width=True)
st.dataframe(leaderboard, use_container_width=True)
def buildings_view(data):
buildings = (
data[
[
"unique_id",
"metadata.cluster_size",
"metadata.building_class",
"metadata.location_id",
"metadata.timezone",
"dataset.available_history.days",
]
]
.groupby("unique_id")
.first()
.rename(
columns={
"metadata.cluster_size": "Cluster size",
"metadata.building_class": "Building class",
"metadata.location_id": "Location ID",
"metadata.timezone": "Timezone",
"dataset.available_history.days": "Available history (days)",
}
)
)
st.metric("Number of buildings", len(buildings))
st.divider()
st.markdown("### Buildings")
st.dataframe(
buildings,
use_container_width=True,
column_config={
"Available history (days)": st.column_config.ProgressColumn(
"Available history (days)",
help="Available training data during the first prediction.",
format="%f",
min_value=0,
max_value=float(buildings["Available history (days)"].max()),
),
},
)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Building classes")
fig = px.pie(
buildings.groupby("Building class").size().reset_index(),
values=0,
names="Building class",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Timezones")
fig = px.pie(
buildings.groupby("Timezone").size().reset_index(),
values=0,
names="Timezone",
)
st.plotly_chart(fig, use_container_width=True)
def models_view(data):
models = (
data[
[
"model",
"cv_config.folds",
"cv_config.horizon",
"cv_config.step",
"cv_config.time",
"model_info.repository",
"model_info.tag",
"model_info.variate_type",
]
]
.groupby("model")
.first()
.rename(
columns={
"cv_config.folds": "CV Folds",
"cv_config.horizon": "CV Horizon",
"cv_config.step": "CV Step",
"cv_config.time": "CV Time",
"model_info.repository": "Image Repository",
"model_info.tag": "Image Tag",
"model_info.variate_type": "Variate type",
}
)
)
st.metric("Number of models", len(models))
st.divider()
st.markdown("### Models")
st.dataframe(models, use_container_width=True)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Variate types")
fig = px.pie(
models.groupby("Variate type").size().reset_index(),
values=0,
names="Variate type",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Frameworks")
_df = models.copy()
_df["Framework"] = _df.index.str.split(".").str[0]
fig = px.pie(
_df.groupby("Framework").size().reset_index(),
values=0,
names="Framework",
)
st.plotly_chart(fig, use_container_width=True)
def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with right:
aggregation = st.selectbox(
"Aggregation", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
fig = px.box(
data_to_plot,
x=f"{metric}.{aggregation}",
y="model",
color="model",
points="all",
)
fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="large")
with left:
x_metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
)
x_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="x_aggregation",
)
with right:
y_metric = st.selectbox(
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
)
y_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="y_aggregation",
)
st.markdown(
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
)
fig = px.scatter(
data_to_plot,
x=f"{x_metric}.{x_aggregation}",
y=f"{y_metric}.{y_aggregation}",
color="model",
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
)
with right:
aggregation = st.selectbox(
"Aggregation across folds",
["min", "mean", "median", "max", "std"],
index=1,
key="table_aggregation",
)
metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
[
f"{metric}.min",
f"{metric}.mean",
f"{metric}.median",
f"{metric}.max",
f"{metric}.std",
]
]
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=0)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
metrics_table = (
data_to_plot.groupby(["model", "unique_id"])
.apply(aggregation, numeric_only=True)
.reset_index()
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
)
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=None)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
def computation_view(data, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
st.markdown("#### Computational Resources")
fig = px.parallel_coordinates(
data_to_plot.groupby("model").mean(numeric_only=True).reset_index(),
dimensions=[
"model",
"resource_usage.CPU",
"resource_usage.memory",
"MAE.mean",
"RMSE.mean",
"MBE.mean",
"rMAE.mean",
],
color="rMAE.mean",
color_continuous_scale=px.colors.diverging.Portland,
)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, center, right = st.columns(3, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with center:
aggregation_per_building = st.selectbox(
"Aggregation per building", ["min", "mean", "median", "max", "std"], index=1
)
with right:
aggregation_per_model = st.selectbox(
"Aggregation per model", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(
f"#### {aggregation_per_model.capitalize()} {aggregation_per_building.capitalize()} {metric} vs CPU usage"
)
aggregated_data = (
data_to_plot.groupby("model")
.agg(aggregation_per_building, numeric_only=True)
.reset_index()
)
fig = px.scatter(
aggregated_data,
x="resource_usage.CPU",
y=f"{metric}.{aggregation_per_model}",
color="model",
log_x=True,
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)