fev-leaderboard / app.py
shchuro's picture
Update app
a6d6654
raw
history blame
4.43 kB
import fev
import gradio as gr
import pandas as pd
# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)
markdown_text = """
This space hosts evaluation results for time series forecasting models.
Benchmark definitions, implementations of models, as well as the evaluation results for individual tasks are available under https://github.com/autogluon/fev.
Currently, the results in this space are a minimal proof of concept. Stay tuned for more benchmarks, results for new models and instructions on how to contribute your results.
"""
summary_urls = [
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
]
selected_cols = ["gmean_relative_error", "avg_rank", "median_inference_time_s"]
rename_cols = {
"gmean_relative_error": "Average relative error",
"avg_rank": "Average rank",
"median_inference_time_s": "Median inference time (s)",
}
lb_mase = (
fev.leaderboard(summary_urls, metric_column="MASE")[selected_cols]
.rename(columns=rename_cols)
.round(3)
.reset_index()
.astype(str)
)
lb_wql = (
fev.leaderboard(summary_urls, metric_column="WQL")[selected_cols]
.rename(columns=rename_cols)
.round(3)
.reset_index()
.astype(str)
)
with gr.Blocks() as demo:
with gr.Tab("Leaderboard"):
gr.Markdown("""
## Chronos zero-shot benchmark results
This tab contains results for various forecasting models on the 28 datasets used in Benchmark II (zero-shot evaluation) in the publication [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815).
Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot).
""")
gr.Markdown("""### Point forecast accuracy (measured by MASE)
""")
gr.Dataframe(
value=lb_mase,
interactive=False,
)
gr.Markdown("### Probabilistic forecast accuracy (measured by WQL)")
gr.Dataframe(
value=lb_wql,
interactive=False,
)
with gr.Tab("About"):
gr.Markdown(markdown_text)
if __name__ == "__main__":
demo.launch()