|
import pandas as pd |
|
import streamlit as st |
|
import plotly.express as px |
|
|
|
|
|
def buildings_view(data): |
|
buildings = ( |
|
data[ |
|
[ |
|
"unique_id", |
|
"metadata.cluster_size", |
|
"metadata.building_class", |
|
"metadata.location_id", |
|
"metadata.timezone", |
|
"dataset.available_history.days", |
|
] |
|
] |
|
.groupby("unique_id") |
|
.first() |
|
.rename( |
|
columns={ |
|
"metadata.cluster_size": "Cluster size", |
|
"metadata.building_class": "Building class", |
|
"metadata.location_id": "Location ID", |
|
"metadata.timezone": "Timezone", |
|
"dataset.available_history.days": "Available history (days)", |
|
} |
|
) |
|
) |
|
|
|
st.metric("Number of buildings", len(buildings)) |
|
st.divider() |
|
|
|
st.markdown("### Buildings") |
|
st.dataframe( |
|
buildings, |
|
use_container_width=True, |
|
column_config={ |
|
"Available history (days)": st.column_config.ProgressColumn( |
|
"Available history (days)", |
|
help="Available training data during the first prediction.", |
|
format="%f", |
|
min_value=0, |
|
max_value=1000, |
|
), |
|
}, |
|
) |
|
|
|
left, right = st.columns(2, gap="large") |
|
with left: |
|
st.markdown("#### Building classes") |
|
fig = px.pie( |
|
buildings.groupby("Building class").size().reset_index(), |
|
values=0, |
|
names="Building class", |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with right: |
|
st.markdown("#### Timezones") |
|
fig = px.pie( |
|
buildings.groupby("Timezone").size().reset_index(), |
|
values=0, |
|
names="Timezone", |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
def models_view(data): |
|
models = ( |
|
data[ |
|
[ |
|
"model", |
|
"cv_config.folds", |
|
"cv_config.horizon", |
|
"cv_config.step", |
|
"cv_config.time", |
|
"model_info.repository", |
|
"model_info.tag", |
|
"model_info.variate_type", |
|
] |
|
] |
|
.groupby("model") |
|
.first() |
|
.rename( |
|
columns={ |
|
"cv_config.folds": "CV Folds", |
|
"cv_config.horizon": "CV Horizon", |
|
"cv_config.step": "CV Step", |
|
"cv_config.time": "CV Time", |
|
"model_info.repository": "Image Repository", |
|
"model_info.tag": "Image Tag", |
|
"model_info.variate_type": "Variate type", |
|
} |
|
) |
|
) |
|
|
|
st.metric("Number of models", len(models)) |
|
st.divider() |
|
|
|
st.markdown("### Models") |
|
st.dataframe(models, use_container_width=True) |
|
|
|
left, right = st.columns(2, gap="large") |
|
with left: |
|
st.markdown("#### Variate types") |
|
fig = px.pie( |
|
models.groupby("Variate type").size().reset_index(), |
|
values=0, |
|
names="Variate type", |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with right: |
|
st.markdown("#### Frameworks") |
|
_df = models.copy() |
|
_df["Framework"] = _df.index.str.split(".").str[0] |
|
fig = px.pie( |
|
_df.groupby("Framework").size().reset_index(), |
|
values=0, |
|
names="Framework", |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
def performance_view(data: pd.DataFrame, models_to_plot: set[str]): |
|
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values( |
|
by="model", ascending=True |
|
) |
|
|
|
left, right = st.columns(2, gap="small") |
|
with left: |
|
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0) |
|
with right: |
|
aggregation = st.selectbox( |
|
"Aggregation", ["min", "mean", "median", "max", "std"], index=1 |
|
) |
|
st.markdown(f"#### {aggregation.capitalize()} {metric} per building") |
|
fig = px.box( |
|
data_to_plot, |
|
x=f"{metric}.{aggregation}", |
|
y="model", |
|
color="model", |
|
points="all", |
|
) |
|
fig.update_layout(showlegend=False, height=40 * len(models_to_plot)) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
st.divider() |
|
|
|
left, right = st.columns(2, gap="large") |
|
with left: |
|
x_metric = st.selectbox( |
|
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric" |
|
) |
|
x_aggregation = st.selectbox( |
|
"Aggregation", |
|
["min", "mean", "median", "max", "std"], |
|
index=1, |
|
key="x_aggregation", |
|
) |
|
with right: |
|
y_metric = st.selectbox( |
|
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric" |
|
) |
|
y_aggregation = st.selectbox( |
|
"Aggregation", |
|
["min", "mean", "median", "max", "std"], |
|
index=1, |
|
key="y_aggregation", |
|
) |
|
|
|
st.markdown( |
|
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}" |
|
) |
|
fig = px.scatter( |
|
data_to_plot, |
|
x=f"{x_metric}.{x_aggregation}", |
|
y=f"{y_metric}.{y_aggregation}", |
|
color="model", |
|
) |
|
fig.update_layout(height=600) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
st.divider() |
|
|
|
left, right = st.columns(2, gap="small") |
|
with left: |
|
metric = st.selectbox( |
|
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric" |
|
) |
|
with right: |
|
aggregation = st.selectbox( |
|
"Aggregation across folds", |
|
["min", "mean", "median", "max", "std"], |
|
index=1, |
|
key="table_aggregation", |
|
) |
|
|
|
metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[ |
|
[ |
|
f"{metric}.min", |
|
f"{metric}.mean", |
|
f"{metric}.median", |
|
f"{metric}.max", |
|
f"{metric}.std", |
|
] |
|
] |
|
|
|
def custom_table(styler): |
|
styler.background_gradient(cmap="seismic", axis=0) |
|
styler.format(precision=2) |
|
|
|
|
|
styler.map(lambda x: "text-align: center; font-size: 14px;") |
|
return styler |
|
|
|
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model") |
|
styled_table = metrics_table.style.pipe(custom_table) |
|
st.dataframe(styled_table, use_container_width=True) |
|
|
|
metrics_table = ( |
|
data_to_plot.groupby(["model", "unique_id"]) |
|
.apply(aggregation, numeric_only=True) |
|
.reset_index() |
|
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}") |
|
) |
|
|
|
def custom_table(styler): |
|
styler.background_gradient(cmap="seismic", axis=None) |
|
styler.format(precision=2) |
|
|
|
|
|
styler.map(lambda x: "text-align: center; font-size: 14px;") |
|
return styler |
|
|
|
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building") |
|
styled_table = metrics_table.style.pipe(custom_table) |
|
st.dataframe(styled_table, use_container_width=True) |
|
|