import os import json import gradio as gr import pandas as pd import numpy as np from pathlib import Path from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import snapshot_download from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ABOUT_TEXT ) from src.display.css_html_js import custom_css # from src.display.utils import ( # BENCHMARK_COLS, # COLS, # EVAL_COLS, # EVAL_TYPES, # NUMERIC_INTERVALS, # TYPES, # AutoEvalColumn, # ModelType, # fields, # WeightType, # Precision # ) from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN try: print(EVAL_RESULTS_PATH) snapshot_download( repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN ) except Exception: pass # restart_space() SUBSET_COUNTS = { "Alignment-Object": 250, "Alignment-Attribute": 229, "Alignment-Action": 115, "Alignment-Count": 55, "Alignment-Location": 75, "Safety-Toxicity-Crime": 29, "Safety-Toxicity-Shocking": 31, "Safety-Toxicity-Disgust": 42, "Safety-Nsfw-Evident": 197, "Safety-Nsfw-Evasive": 177, "Safety-Nsfw-Subtle": 98, "Quality-Distortion-Human_face": 169, "Quality-Distortion-Human_limb": 152, "Quality-Distortion-Object": 100, "Quality-Blurry-Defocused": 350, "Quality-Blurry-Motion": 350, "Bias-Age": 80, "Bias-Gender": 140, "Bias-Race": 140, "Bias-Nationality": 120, "Bias-Religion": 60, } PERSPECTIVE_COUNTS= { "Alignment": 724, "Safety": 574, "Quality": 1121, "Bias": 540 } META_DATA = ['Model'] def restart_space(): API.restart_space(repo_id=REPO_ID) # color_map = { # "Score Model": "#7497db", # "Opensource VLM": "#E8ECF2", # "Closesource VLM": "#ffcd75", # "Others": "#75809c", # # #7497db #E8ECF2 #ffcd75 #75809c # } # def color_model_type_column(df, color_map): # """ # Apply color to the 'Modality' column of the DataFrame based on a given color mapping. # Parameters: # df (pd.DataFrame): The DataFrame containing the 'Modality' column. # color_map (dict): A dictionary mapping model types to colors. # Returns: # pd.Styler: The styled DataFrame. # """ # # Function to apply color based on the model type # def apply_color(val): # color = color_map.get(val, "default") # Default color if not specified in color_map # return f'background-color: {color}' # # Format for different columns # format_dict = {col: "{:.1f}" for col in df.columns if col not in META_DATA} # format_dict['Overall Score'] = "{:.2f}" # format_dict[''] = "{:d}" # return df.style.applymap(apply_color, subset=['Modality']).format(format_dict, na_rep='') def regex_table(dataframe, regex, filter_button, style=True): """ Takes a model name as a regex, then returns only the rows that has that in it. """ # Split regex statement by comma and trim whitespace around regexes regex_list = [x.strip() for x in regex.split(",")] # Join the list into a single regex pattern with '|' acting as OR combined_regex = '|'.join(regex_list) # if filter_button, remove all rows with "ai2" in the model name update_scores = False if isinstance(filter_button, list) or isinstance(filter_button, str): if "Image-Text-to-Text" not in filter_button: dataframe = dataframe[~dataframe["Modality"].str.contains("Image-Text-to-Text", case=False, na=False)] if "Video-Text-to-Text" not in filter_button: dataframe = dataframe[~dataframe["Modality"].str.contains("Video-Text-to-Text", case=False, na=False)] # Filter the dataframe such that 'model' contains any of the regex patterns data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)] data.reset_index(drop=True, inplace=True) # replace column '' with count/rank data.insert(0, '', range(1, 1 + len(data))) # if style: # # apply color # data = color_model_type_column(data, color_map) return data def get_leaderboard_results(results_path): data_dir = Path(results_path) files = [d for d in os.listdir(data_dir)] # TODO check if "Path(data_dir) / d" is a dir df = pd.DataFrame() for file in files: if not file.endswith(".json"): continue with open(results_path / file) as rf: result = json.load(rf) result = pd.DataFrame(result) df = pd.concat([result, df]) df.reset_index(drop=True, inplace=True) return df def avg_all_perspective(orig_df: pd.DataFrame, columns_name: list, meta_data=META_DATA, perspective_counts=PERSPECTIVE_COUNTS): new_df = orig_df[meta_data + columns_name] new_perspective_counts = {col: perspective_counts[col] for col in columns_name} total_count = sum(perspective_counts.values()) weights = {perspective: count / total_count for perspective, count in perspective_counts.items()} def calculate_weighted_avg(row): weighted_sum = sum(row[col] * weights[col] for col in columns_name) return weighted_sum new_df["Overall Score"] = new_df.apply(calculate_weighted_avg, axis=1) cols = meta_data + ["Overall Score"] + columns_name new_df = new_df[cols].sort_values(by="Overall Score", ascending=False).reset_index(drop=True) return new_df data = { "Model": [ "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", "Beaver-Vision-11B", ], "Modality":[ "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Text", ], "Correctness of Information": [ 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, ], "Detail Orientation": [ 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, ], "Safety": [ 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, ], "AVG": [ 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, 100.00, ] } df = pd.DataFrame(data) total_models = len(df) with gr.Blocks(css=custom_css) as app: with gr.Row(): with gr.Column(scale=6): gr.Markdown(INTRODUCTION_TEXT.format(str(total_models))) with gr.Column(scale=4): gr.Markdown("![](https://huggingface.co/spaces/Align-Anything/Leaderboard/blob/main/src/overview.jpeg)") # gr.HTML(BGB_LOGO, elem_classes="logo") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏆 Align-Anything Leaderboard"): with gr.Row(): search_overall = gr.Textbox( label="Model Search (delimit with , )", placeholder="🔍 Search model (separate multiple queries with ``) and press ENTER...", show_label=False ) model_type_overall = gr.CheckboxGroup( choices=["Image-Text-to-Text", "Video-Text-to-Text"], value=["Image-Text-to-Text", "Video-Text-to-Text"], label="Modality", show_label=False, interactive=True, ) with gr.Row(): Align_Anything_table_overall_hidden = gr.Dataframe( df, headers=df.columns.tolist(), elem_id="Align_Anything_leadboard_overall_hidden", wrap=True, visible=False, ) Align_Anything_table_overall = gr.Dataframe( regex_table( df.copy(), "", ["Video-Text-to-Text", "Image-Text-to-Text"] ), headers=df.columns.tolist(), elem_id="Align_Anything_leadboard_overall", wrap=True, ) with gr.TabItem("About"): with gr.Row(): gr.Markdown(ABOUT_TEXT) with gr.Accordion("📚 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, lines=7, label="Copy the following to cite these results.", elem_id="citation-button", show_copy_button=True, ) search_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall) model_type_overall.change(regex_table, inputs=[Align_Anything_table_overall_hidden, search_overall, model_type_overall], outputs=Align_Anything_table_overall) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=18000) # restarted every 3h scheduler.start() # app.queue(default_concurrency_limit=40).launch() app.launch(allowed_paths=['./', "./src", "./evals"])