MMLU-by-task / app.py
Corey Morris
Two files being loaded and results displayed side by side
df074bd
raw
history blame
1.35 kB
import gradio as gr
import pandas as pd
import numpy as np
import json
import requests
# URLs for the two JSON files
FILE_URL_1 = "https://raw.githubusercontent.com/EleutherAI/lm-evaluation-harness/master/results/llama/llama-30B/llama-30B_mmlu_5-shot.json"
FILE_URL_2 = "https://raw.githubusercontent.com/EleutherAI/lm-evaluation-harness/master/results/llama/llama-13B/llama-13B_mmlu_5-shot.json"
# Load data from both URLs
response1 = requests.get(FILE_URL_1)
data1 = response1.json()
response2 = requests.get(FILE_URL_2)
data2 = response2.json()
# Convert data from both URLs into DataFrames
data1_df = pd.DataFrame(data1['results']).T
data2_df = pd.DataFrame(data2['results']).T
# Rename 'acc' column to respective file names
data1_df = data1_df.rename(columns={'acc': 'acc_file_1'})
data2_df = data2_df.rename(columns={'acc': 'acc_file_2'})
# Merge the dataframes on index (Here index is the sub-test names)
data = pd.merge(data1_df['acc_file_1'], data2_df['acc_file_2'], left_index=True, right_index=True)
def show_leaderboard():
# Convert dataframe to html so that it can be displayed properly in Gradio
return data.to_html()
iface = gr.Interface(fn=show_leaderboard, inputs=[], outputs="html")
# Run the interface.
# Note: you don't need to use .launch() in Hugging Face Spaces, this is for local testing.
iface.launch()