attilabalint
added performance page
f87d2ee
raw
history blame
7.27 kB
import pandas as pd
import streamlit as st
import plotly.express as px
def buildings_view(data):
buildings = (
data[
[
"unique_id",
"metadata.cluster_size",
"metadata.building_class",
"metadata.location_id",
"metadata.timezone",
"dataset.available_history.days",
]
]
.groupby("unique_id")
.first()
.rename(
columns={
"metadata.cluster_size": "Cluster size",
"metadata.building_class": "Building class",
"metadata.location_id": "Location ID",
"metadata.timezone": "Timezone",
"dataset.available_history.days": "Available history (days)",
}
)
)
st.metric("Number of buildings", len(buildings))
st.divider()
st.markdown("### Buildings")
st.dataframe(
buildings,
use_container_width=True,
column_config={
"Available history (days)": st.column_config.ProgressColumn(
"Available history (days)",
help="Available training data during the first prediction.",
format="%f",
min_value=0,
max_value=1000,
),
},
)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Building classes")
fig = px.pie(
buildings.groupby("Building class").size().reset_index(),
values=0,
names="Building class",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Timezones")
fig = px.pie(
buildings.groupby("Timezone").size().reset_index(),
values=0,
names="Timezone",
)
st.plotly_chart(fig, use_container_width=True)
def models_view(data):
models = (
data[
[
"model",
"cv_config.folds",
"cv_config.horizon",
"cv_config.step",
"cv_config.time",
"model_info.repository",
"model_info.tag",
"model_info.variate_type",
]
]
.groupby("model")
.first()
.rename(
columns={
"cv_config.folds": "CV Folds",
"cv_config.horizon": "CV Horizon",
"cv_config.step": "CV Step",
"cv_config.time": "CV Time",
"model_info.repository": "Image Repository",
"model_info.tag": "Image Tag",
"model_info.variate_type": "Variate type",
}
)
)
st.metric("Number of models", len(models))
st.divider()
st.markdown("### Models")
st.dataframe(models, use_container_width=True)
left, right = st.columns(2, gap="large")
with left:
st.markdown("#### Variate types")
fig = px.pie(
models.groupby("Variate type").size().reset_index(),
values=0,
names="Variate type",
)
st.plotly_chart(fig, use_container_width=True)
with right:
st.markdown("#### Frameworks")
_df = models.copy()
_df["Framework"] = _df.index.str.split(".").str[0]
fig = px.pie(
_df.groupby("Framework").size().reset_index(),
values=0,
names="Framework",
)
st.plotly_chart(fig, use_container_width=True)
def performance_view(data: pd.DataFrame, models_to_plot: set[str]):
data_to_plot = data[data["model"].isin(models_to_plot)].sort_values(
by="model", ascending=True
)
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox("Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0)
with right:
aggregation = st.selectbox(
"Aggregation", ["min", "mean", "median", "max", "std"], index=1
)
st.markdown(f"#### {aggregation.capitalize()} {metric} per building")
fig = px.box(
data_to_plot,
x=f"{metric}.{aggregation}",
y="model",
color="model",
points="all",
)
fig.update_layout(showlegend=False, height=40 * len(models_to_plot))
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="large")
with left:
x_metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="x_metric"
)
x_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="x_aggregation",
)
with right:
y_metric = st.selectbox(
"Aggregation", ["MAE", "RMSE", "MBE", "rMAE"], index=1, key="y_metric"
)
y_aggregation = st.selectbox(
"Aggregation",
["min", "mean", "median", "max", "std"],
index=1,
key="y_aggregation",
)
st.markdown(
f"#### {x_aggregation.capitalize()} {x_metric} vs {y_aggregation.capitalize()} {y_metric}"
)
fig = px.scatter(
data_to_plot,
x=f"{x_metric}.{x_aggregation}",
y=f"{y_metric}.{y_aggregation}",
color="model",
)
fig.update_layout(height=600)
st.plotly_chart(fig, use_container_width=True)
st.divider()
left, right = st.columns(2, gap="small")
with left:
metric = st.selectbox(
"Metric", ["MAE", "RMSE", "MBE", "rMAE"], index=0, key="table_metric"
)
with right:
aggregation = st.selectbox(
"Aggregation across folds",
["min", "mean", "median", "max", "std"],
index=1,
key="table_aggregation",
)
metrics_table = data_to_plot.groupby(["model"]).agg(aggregation, numeric_only=True)[
[
f"{metric}.min",
f"{metric}.mean",
f"{metric}.median",
f"{metric}.max",
f"{metric}.std",
]
]
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=0)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per model")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)
metrics_table = (
data_to_plot.groupby(["model", "unique_id"])
.apply(aggregation, numeric_only=True)
.reset_index()
.pivot(index="model", columns="unique_id", values=f"{metric}.{aggregation}")
)
def custom_table(styler):
styler.background_gradient(cmap="seismic", axis=None)
styler.format(precision=2)
# center text and increase font size
styler.map(lambda x: "text-align: center; font-size: 14px;")
return styler
st.markdown(f"#### {aggregation.capitalize()} {metric} stats per building")
styled_table = metrics_table.style.pipe(custom_table)
st.dataframe(styled_table, use_container_width=True)