Spaces:
Running
Running
File size: 4,427 Bytes
0737cdd fdb3d3e 5068c86 fdb3d3e e1ca246 732efe6 e1ca246 fdb3d3e 0737cdd d4a7fd1 a6d6654 d4a7fd1 a6d6654 d4a7fd1 0737cdd fdb3d3e e1ca246 732efe6 0737cdd 732efe6 c1ea364 0737cdd f3548e9 c1ea364 fdb3d3e e1ca246 fdb3d3e 5068c86 e1ca246 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import fev
import gradio as gr
import pandas as pd
# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)
markdown_text = """
This space hosts evaluation results for time series forecasting models.
Benchmark definitions, implementations of models, as well as the evaluation results for individual tasks are available under https://github.com/autogluon/fev.
Currently, the results in this space are a minimal proof of concept. Stay tuned for more benchmarks, results for new models and instructions on how to contribute your results.
"""
summary_urls = [
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
]
selected_cols = ["gmean_relative_error", "avg_rank", "median_inference_time_s"]
rename_cols = {
"gmean_relative_error": "Average relative error",
"avg_rank": "Average rank",
"median_inference_time_s": "Median inference time (s)",
}
lb_mase = (
fev.leaderboard(summary_urls, metric_column="MASE")[selected_cols]
.rename(columns=rename_cols)
.round(3)
.reset_index()
.astype(str)
)
lb_wql = (
fev.leaderboard(summary_urls, metric_column="WQL")[selected_cols]
.rename(columns=rename_cols)
.round(3)
.reset_index()
.astype(str)
)
with gr.Blocks() as demo:
with gr.Tab("Leaderboard"):
gr.Markdown("""
## Chronos zero-shot benchmark results
This tab contains results for various forecasting models on the 28 datasets used in Benchmark II (zero-shot evaluation) in the publication [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815).
Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot).
""")
gr.Markdown("""### Point forecast accuracy (measured by MASE)
""")
gr.Dataframe(
value=lb_mase,
interactive=False,
)
gr.Markdown("### Probabilistic forecast accuracy (measured by WQL)")
gr.Dataframe(
value=lb_wql,
interactive=False,
)
with gr.Tab("About"):
gr.Markdown(markdown_text)
if __name__ == "__main__":
demo.launch()
|