import fev
import gradio as gr
import pandas as pd

# Load the CSV data into a pandas DataFrame
df = pd.read_csv(
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv"
)

markdown_text = """
This space hosts evaluation results for time series forecasting models.

Benchmark definitions, implementations of models, as well as the evaluation results for individual tasks are available under https://github.com/autogluon/fev.

Currently, the results in this space are a minimal proof of concept. Stay tuned for more benchmarks, results for new models and instructions on how to contribute your results.
"""

summary_urls = [
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_arima.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_ets.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/auto_theta.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_mini.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/chronos_bolt_tiny.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_base.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_large.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/moirai_small.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/seasonal_naive.csv",
    "https://raw.githubusercontent.com/autogluon/fev/refs/heads/main/benchmarks/chronos_zeroshot/results/timesfm.csv",
]

selected_cols = ["gmean_relative_error", "avg_rank", "median_inference_time_s"]
rename_cols = {
    "gmean_relative_error": "Average relative error",
    "avg_rank": "Average rank",
    "median_inference_time_s": "Median inference time (s)",
}

lb_mase = (
    fev.leaderboard(summary_urls, metric_column="MASE")[selected_cols]
    .rename(columns=rename_cols)
    .round(3)
    .reset_index()
)
lb_wql = (
    fev.leaderboard(summary_urls, metric_column="WQL")[selected_cols]
    .rename(columns=rename_cols)
    .round(3)
    .reset_index()
)


with gr.Blocks() as demo:
    with gr.Tab("Leaderboard"):
        gr.Markdown("""
                    ## Chronos zero-shot benchmark results

                    This tab contains results for various forecasting models on the 28 datasets used in Benchmark II (zero-shot evaluation) in the publication [Chronos: Learning the Language of Time Series](https://arxiv.org/abs/2403.07815).

                    Task definitions and the detailed results are available on [GitHub](https://github.com/autogluon/fev/tree/main/benchmarks/chronos_zeroshot).
                    """)
        gr.Markdown("""### Point forecast accuracy (measured by MASE)
                    """)
        gr.Dataframe(
            value=lb_mase,
            interactive=False,
        )

        gr.Markdown("### Probabilistic forecast accuracy (measured by WQL)")
        gr.Dataframe(
            value=lb_wql,
            interactive=False,
        )

    with gr.Tab("About"):
        gr.Markdown(markdown_text)

if __name__ == "__main__":
    demo.launch()