|
import os |
|
import json |
|
import glob |
|
from collections import defaultdict |
|
import pandas as pd |
|
import gradio as gr |
|
from content import * |
|
from css import * |
|
import glob |
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
def format_floats(val): |
|
if val: |
|
if isinstance(val, float): |
|
return f"{val:.1f}" |
|
return val |
|
|
|
|
|
|
|
ARC = "arc" |
|
HELLASWAG = "hellaswag" |
|
MMLU = "mmlu" |
|
TRUTHFULQA = "truthfulqa" |
|
BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA] |
|
|
|
METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"] |
|
|
|
LANGS = 'ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh'.split(',') |
|
|
|
LANG_NAME = { |
|
'ar': 'Arabic', |
|
'bn': 'Bengali', |
|
'ca': 'Catalan', |
|
'da': 'Danish', |
|
'de': 'German', |
|
'es': 'Spanish', |
|
'eu': 'Basque', |
|
'fr': 'French', |
|
'gu': 'Gujarati', |
|
'hi': 'Hindi', |
|
'hr': 'Croatian', |
|
'hu': 'Hungarian', |
|
'hy': 'Armenian', |
|
'id': 'Indonesian', |
|
'it': 'Italian', |
|
'kn': 'Kannada', |
|
'ml': 'Malayalam', |
|
'mr': 'Marathi', |
|
'ne': 'Nepali', |
|
'nl': 'Dutch', |
|
'pt': 'Portuguese', |
|
'ro': 'Romanian', |
|
'ru': 'Russian', |
|
'sk': 'Slovak', |
|
'sr': 'Serbian', |
|
'sv': 'Swedish', |
|
'ta': 'Tamil', |
|
'te': 'Telugu', |
|
'uk': 'Ukrainian', |
|
'vi': 'Vietnamese', |
|
'zh': 'Chinese' |
|
} |
|
|
|
MODEL_COL = "Model" |
|
LANG_COL = "Language" |
|
CODE_COL = "Code" |
|
AVERAGE_COL = "Average" |
|
ARC_COL = "ARC (25-shot)" |
|
|
|
MGSM_COL = "MGSM" |
|
MSVAMP_COL = "MSVAMP" |
|
MNUM_COL = "MNumGLUESub" |
|
HELLASWAG_COL = "HellaSwag (0-shot)️" |
|
MMLU_COL = "MMLU (25-shot)" |
|
TRUTHFULQA_COL = "TruthfulQA (0-shot)" |
|
NOTES_COL = "Notes" |
|
|
|
|
|
|
|
|
|
|
|
|
|
COLS = [MODEL_COL, MSVAMP_COL, MGSM_COL, MNUM_COL,NOTES_COL] |
|
TYPES = ["str", "number", "number", "number","html"] |
|
|
|
|
|
|
|
def get_leaderboard_df(): |
|
df = list() |
|
results = [ |
|
["GPT-3.5-Turbo", 46.6, 42.2, 49.4,'GPT-3.5-Turbo'], |
|
["MAmmoTH 7B", 26.3, 21.3, 24.2,'<a href="https://arxiv.org/abs/2309.05653" target="_blank">MAmmoTH</a>'], |
|
["WizardMath 7B", 32.5, 23.0, 28.7,'<a href="https://arxiv.org/abs/2308.09583" target="_blank">WizardMath</a>'], |
|
["MetaMath 7B", 46.2, 37.0, 43.2,'<a href="https://arxiv.org/abs/2309.12284" target="_blank">MetaMath</a>'], |
|
["MetaMath-LB-9B",None,50.2,None,'<a href="https://arxiv.org/abs/2401.10695" target="_blank">LangBridge</a>'], |
|
["XCoT 7B",42.9,41.5,None,'<a href="https://arxiv.org/abs/2401.07037" target="_blank">XCoT</a>'], |
|
["QAlign 7B", 57.2, 49.6, None,'<a href="https://arxiv.org/abs/2401.07817" target="_blank">QAlign</a>'], |
|
["MathOctopus 7B", 41.2, 39.5, 37.1,'<a href="https://arxiv.org/abs/2310.20246" target="_blank">MathOctopus</a>'], |
|
["MathOctopus-MAPO-DPO 7B", 57.4, 41.6, 50.4,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MetaMathOctopus 7B", 53.0, 45.5, 39.2,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MetaMathOctopus-MAPO-DPO 7B 👑", 64.7, 51.6, 52.9,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MistralMathOctopus 7B", 59.0, 58.0, 56.8,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MistralMathOctopus-MAPO-DPO 7B 👑", 74.6, 67.3, 70.0,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
] |
|
|
|
df = pd.DataFrame.from_records(results, columns=COLS) |
|
df = df.sort_values(by=[ MSVAMP_COL], ascending=False) |
|
df = df[COLS] |
|
|
|
return df |
|
|
|
def get_leaderboard_13Bdf(): |
|
df = list() |
|
results = [ |
|
["GPT-3.5-Turbo", 46.6, 42.2, 49.4,'GPT-3.5-Turbo'], |
|
["MAmmoTH 13B", 38.6, 28.9, 29.5,'<a href="https://arxiv.org/abs/2309.05653" target="_blank">MAmmoTH</a>'], |
|
["WizardMath 13B", 35.7, 28.3, 29.0,'<a href="https://arxiv.org/abs/2308.09583" target="_blank">WizardMath</a>'], |
|
["MetaMath 13B", 46.2, 43.9, 43.3,'<a href="https://arxiv.org/abs/2309.12284" target="_blank">MetaMath</a>'], |
|
["QAlign 13B", 62.6, 57.1, None,'<a href="https://arxiv.org/abs/2401.07817" target="_blank">QAlign</a>'], |
|
["MathOctopus 13B", 51.8, 46.0, 40.3,'<a href="https://arxiv.org/abs/2310.20246" target="_blank">MathOctopus</a>'], |
|
["MetaMath-LB-15B",None,55.2,None,'<a href="https://arxiv.org/abs/2401.10695" target="_blank">LangBridge</a>'], |
|
["MetaMath-LB-20B",None,56.7,None,'<a href="https://arxiv.org/abs/2401.10695" target="_blank">LangBridge</a>'], |
|
["MathOctopus-MAPO-DPO 13B ", 60.1, 48.5, 53.8,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MetaMathOctopus 13B", 56.3, 51.4, 49.5,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
["MetaMathOctopus-MAPO-DPO 13B 👑", 67.0, 58.0, 59.8,'<a href="https://arxiv.org/abs/2401.06838" target="_blank">MAPO</a>'], |
|
] |
|
|
|
|
|
df = pd.DataFrame.from_records(results, columns=COLS) |
|
df = df.sort_values(by=[ MSVAMP_COL], ascending=False) |
|
df = df[COLS] |
|
|
|
return df |
|
|
|
def search_table(df, query): |
|
filtered_df = df[df[NOTES_COL].str.contains(query, case=False)] |
|
return filtered_df |
|
|
|
|
|
|
|
original_df = get_leaderboard_df() |
|
original_13Bdf = get_leaderboard_13Bdf() |
|
|
|
demo = gr.Blocks(css=CUSTOM_CSS) |
|
with demo: |
|
gr.HTML(TITLE) |
|
gr.Markdown(INTRO_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Group(): |
|
search_bar = gr.Textbox( |
|
placeholder="Search models and languages...", show_label=False, elem_id="search-bar" |
|
) |
|
original_df = original_df.applymap(format_floats) |
|
leaderboard_table = gr.components.Dataframe( |
|
value=original_df, |
|
headers=COLS, |
|
datatype=TYPES, |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
|
|
hidden_leaderboard_table_for_search = gr.components.Dataframe( |
|
value=original_df, headers=COLS, datatype=TYPES, visible=False |
|
) |
|
|
|
search_bar.change( |
|
search_table, |
|
[hidden_leaderboard_table_for_search, search_bar], |
|
leaderboard_table, |
|
) |
|
|
|
with gr.Group(): |
|
search_bar = gr.Textbox( |
|
placeholder="Search models and languages...", show_label=False, elem_id="search-bar" |
|
) |
|
original_13Bdf = original_13Bdf.applymap(format_floats) |
|
leaderboard_table_13B = gr.components.Dataframe( |
|
value=original_13Bdf, |
|
headers=COLS, |
|
datatype=TYPES, |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
|
|
hidden_leaderboard_table_for_search_13B = gr.components.Dataframe( |
|
value=original_13Bdf, headers=COLS, datatype=TYPES, visible=False |
|
) |
|
|
|
search_bar.change( |
|
search_table, |
|
[hidden_leaderboard_table_for_search_13B, search_bar], |
|
leaderboard_table_13B, |
|
) |
|
|
|
gr.Markdown(CITATION, elem_classes="markdown-text") |
|
|
|
demo.launch() |
|
|