Spaces:

autogluon
/

fev-leaderboard

Running

App Files Files Community

fev-leaderboard / app.py

shchuro

Update app

a6d6654 7 months ago

raw

history blame

4.43 kB

	import fev
	import gradio as gr
	import pandas as pd

	# Load the CSV data into a pandas DataFrame
	df = pd.read_csv(
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
	)

	markdown_text = """
	This space hosts evaluation results for time series forecasting models.

	Benchmark definitions, implementations of models, as well as the evaluation results for individual tasks are available under https://github.com/autogluon/fev.

	Currently, the results in this space are a minimal proof of concept. Stay tuned for more benchmarks, results for new models and instructions on how to contribute your results.
	"""

	summary_urls = [
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
	"https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
	]

	selected_cols = ["gmean_relative_error", "avg_rank", "median_inference_time_s"]
	rename_cols = {
	"gmean_relative_error": "Average relative error",
	"avg_rank": "Average rank",
	"median_inference_time_s": "Median inference time (s)",
	}

	lb_mase = (
	fev.leaderboard(summary_urls, metric_column="MASE")[selected_cols]
	.rename(columns=rename_cols)
	.round(3)
	.reset_index()
	.astype(str)
	)
	lb_wql = (
	fev.leaderboard(summary_urls, metric_column="WQL")[selected_cols]
	.rename(columns=rename_cols)
	.round(3)
	.reset_index()
	.astype(str)
	)


	with gr.Blocks() as demo:
	with gr.Tab("Leaderboard"):
	gr.Markdown("""
	## Chronos zero-shot benchmark results

	This tab contains results for various forecasting models on the 28 datasets used in Benchmark II (zero-shot evaluation) in the publication [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815).

	Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot).
	""")
	gr.Markdown("""### Point forecast accuracy (measured by MASE)
	""")
	gr.Dataframe(
	value=lb_mase,
	interactive=False,
	)

	gr.Markdown("### Probabilistic forecast accuracy (measured by WQL)")
	gr.Dataframe(
	value=lb_wql,
	interactive=False,
	)

	with gr.Tab("About"):
	gr.Markdown(markdown_text)

	if __name__ == "__main__":
	demo.launch()