import gradio as gr import pandas as pd from glob import glob csv_results = glob("results/*.csv") # load the csv files into a dict with keys being name of the file and values being the data data = {file: pd.read_csv(file) for file in csv_results} def calculate_accuracy(df): return df["parsed_judge_response"].mean() * 100 def accuracy_breakdown(df): # 4 level accuracy return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values # Define the column names with icons headers_with_icons = [ "🤖 Model Name", "⭐ Overall", "📈 Level 1", "🔍 Level 2", "📘 Level 3", "🔬 Level 4", ] accuracy = {file: calculate_accuracy(data[file]) for file in data} # Create a list to hold the data data_for_df = [] # Define the column names with icons # Iterate over each file and its corresponding DataFrame in the data dictionary for file, df in data.items(): # Get the overall accuracy and round it overall_accuracy = round(calculate_accuracy(df), 2) # Get the breakdown accuracy and round each value breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)] # Prepare the model name from the file name model_name = file.split("/")[-1].replace(".csv", "") # Corrected the file extension # Append the data to the list data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy) # Define the column names, adjust based on the number of difficulty levels you have column_names = [ "Model Name", "Overall Accuracy", "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy", "Level 4 Accuracy", ] # Create the DataFrame accuracy_df = pd.DataFrame(data_for_df, columns=column_names) accuracy_df.columns = headers_with_icons accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True) with gr.Blocks() as demo: gr.Markdown("# FSMBench Leaderboard") # add link to home page and dataset leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) demo.launch()