|  | import gradio as gr | 
					
						
						|  | from apscheduler.schedulers.background import BackgroundScheduler | 
					
						
						|  | from huggingface_hub import snapshot_download | 
					
						
						|  | import pandas as pd | 
					
						
						|  |  | 
					
						
						|  | from src.about import ( | 
					
						
						|  | REPRODUCIBILITY_TEXT, | 
					
						
						|  | INTRODUCTION_TEXT, | 
					
						
						|  | ABOUT_TEXT, | 
					
						
						|  | TITLE, | 
					
						
						|  | ) | 
					
						
						|  | from src.display.css_html_js import custom_css, custom_js | 
					
						
						|  | from src.display.utils import ( | 
					
						
						|  | COLS, | 
					
						
						|  | ST_BENCHMARK_COLS, | 
					
						
						|  | AGENTIC_BENCHMARK_COLS, | 
					
						
						|  | EVAL_COLS, | 
					
						
						|  | AutoEvalColumn, | 
					
						
						|  | fields, | 
					
						
						|  | ) | 
					
						
						|  | from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN | 
					
						
						|  | from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP | 
					
						
						|  | from src.submission.submit import add_new_eval | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def restart_space(): | 
					
						
						|  | API.restart_space(repo_id=REPO_ID) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | print(EVAL_REQUESTS_PATH) | 
					
						
						|  | snapshot_download( | 
					
						
						|  | repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | 
					
						
						|  | ) | 
					
						
						|  | except Exception: | 
					
						
						|  | restart_space() | 
					
						
						|  | try: | 
					
						
						|  | print(EVAL_RESULTS_PATH) | 
					
						
						|  | snapshot_download( | 
					
						
						|  | repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | 
					
						
						|  | ) | 
					
						
						|  | except Exception: | 
					
						
						|  | restart_space() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ST_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, ST_BENCHMARK_COLS) | 
					
						
						|  | AGENTIC_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, AGENTIC_BENCHMARK_COLS) | 
					
						
						|  |  | 
					
						
						|  | ( | 
					
						
						|  | finished_eval_queue_df, | 
					
						
						|  | running_eval_queue_df, | 
					
						
						|  | pending_eval_queue_df, | 
					
						
						|  | ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) | 
					
						
						|  |  | 
					
						
						|  | def bold_max(s): | 
					
						
						|  | is_max = s == s.max() | 
					
						
						|  | return ['font-weight: bold' if v else '' for v in is_max] | 
					
						
						|  |  | 
					
						
						|  | def init_leaderboard(dataframe, benchmark_type): | 
					
						
						|  | if dataframe is None or dataframe.empty: | 
					
						
						|  | raise ValueError("Leaderboard DataFrame is empty or None.") | 
					
						
						|  |  | 
					
						
						|  | AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return gr.components.Dataframe( | 
					
						
						|  | value=dataframe, | 
					
						
						|  | datatype=[c.type for c in AutoEvalColumnSubset], | 
					
						
						|  | column_widths=["150px" if c.name != "Model" else "250px" for c in AutoEvalColumnSubset], | 
					
						
						|  | wrap=False, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | black_logo_path = "src/assets/logo-icon-black.png" | 
					
						
						|  | white_logo_path = "src/assets/logo-icon-white.png" | 
					
						
						|  |  | 
					
						
						|  | demo = gr.Blocks( | 
					
						
						|  | css=custom_css, | 
					
						
						|  | js=custom_js, | 
					
						
						|  | theme=gr.themes.Default(primary_hue=gr.themes.colors.pink), | 
					
						
						|  | fill_height=True, | 
					
						
						|  | fill_width=True, | 
					
						
						|  | ) | 
					
						
						|  | with demo: | 
					
						
						|  | gr.HTML(f""" | 
					
						
						|  | <div id="page-header"> | 
					
						
						|  | <div id="header-container"> | 
					
						
						|  | <div id="left-container"> | 
					
						
						|  | <img id="black-logo" src="/gradio_api/file={black_logo_path}"> | 
					
						
						|  | <img id="white-logo" src="/gradio_api/file={white_logo_path}"> | 
					
						
						|  | </div> | 
					
						
						|  | <div id="centre-container"> | 
					
						
						|  | <h1 style="margin-bottom: 0.25rem;">{TITLE}</h1> | 
					
						
						|  | <p style="color:#eb088a; margin:0; font-size:1.2rem;">Explore Interactive Results & Traces</p> | 
					
						
						|  | </div> | 
					
						
						|  | <div id="right-container"> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | </div> | 
					
						
						|  | """) | 
					
						
						|  | gr.Markdown(INTRODUCTION_TEXT, elem_classes="intro-text", sanitize_html=False) | 
					
						
						|  |  | 
					
						
						|  | with gr.Tabs(elem_classes=["leaderboard-table", "tab-buttons"]) as tabs: | 
					
						
						|  | with gr.TabItem("Base Benchmarks", elem_classes="llm-benchmark-tab-table", id=0): | 
					
						
						|  | leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "base") | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("Agentic Benchmarks", elem_classes="llm-benchmark-tab-table", id=1): | 
					
						
						|  | leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic") | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("About", elem_classes="llm-benchmark-tab-table", id=2): | 
					
						
						|  | gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text", sanitize_html=False) | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("Reproducibility", elem_classes="llm-benchmark-tab-table", id=3): | 
					
						
						|  | gr.Markdown(REPRODUCIBILITY_TEXT, elem_classes="markdown-text", sanitize_html=False) | 
					
						
						|  |  | 
					
						
						|  | assets = [black_logo_path, white_logo_path] | 
					
						
						|  |  | 
					
						
						|  | scheduler = BackgroundScheduler() | 
					
						
						|  | scheduler.add_job(restart_space, "interval", seconds=1800) | 
					
						
						|  | scheduler.start() | 
					
						
						|  | demo.queue(default_concurrency_limit=40).launch(allowed_paths=assets) |