File size: 3,112 Bytes
54f0e5a
 
498bbe0
54f0e5a
 
 
 
2051b0b
 
54f0e5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5907c4
 
 
 
54f0e5a
 
 
c5907c4
 
54f0e5a
 
 
 
92c2d28
54f0e5a
 
 
 
 
aa9569d
498bbe0
aa9569d
54f0e5a
 
aa9569d
 
 
 
 
 
 
 
498bbe0
aa9569d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
from pathlib import Path
from ..styles import highlight_color

abs_path = Path(__file__).parent.parent.parent

def replace_models_names(model_name):
    if "gpt" in model_name:
        return model_name
    replaces = {'meta-llama': 'meta_llama',
        'epfl-llm':'epfl_llm',
        '01-ai':'01_ai'}
    new_name = model_name.replace('model-', '')
    for k, v in replaces.items():
        if new_name.startswith(k):
            new_name = new_name.replace(k, v)
    new_name = new_name.replace('-','/',1)
    new_name = new_name.replace('_','-',1)
    new_name = f"[{new_name}](https://huggingface.co/{new_name})"
    return new_name

def generate_ORDER_LIST_LEK_and_data_types(json_data):
    ORDER_LIST_LEK = ["model_name", "overall_accuracy"]
    data_types = ["markdown", "number"]

    for key in json_data.keys():
        if key not in ["model_name", "overall_accuracy"]:
            ORDER_LIST_LEK.append(key)
            data_types.append("number")
    ORDER_LIST_LEK[2:] = sorted(ORDER_LIST_LEK[2:])
    return ORDER_LIST_LEK, data_types

def filter_columns_lek(column_choices):
    selected_columns = [col for col in ORDER_LIST_LEK if col in column_choices]
    return LEK_ACCS[selected_columns]

def load_json_data(file_path, ORDER_LIST_LEK):
    LEK_ACCS = pd.read_json(file_path)
    for column in LEK_ACCS.columns:
        if LEK_ACCS[column].apply(type).eq(dict).any():
            LEK_ACCS[column] = LEK_ACCS[column].apply(str)

    LEK_ACCS["model_name"] = LEK_ACCS["model_name"].apply(
        lambda name: replace_models_names(name)
    )

    for column in LEK_ACCS.select_dtypes(include='number').columns:
            LEK_ACCS[column] = LEK_ACCS[column].round(2)


    LEK_ACCS["overall_accuracy"] = pd.to_numeric(LEK_ACCS["overall_accuracy"], errors='coerce')

    ordered_columns = [col for col in ORDER_LIST_LEK if col in LEK_ACCS.columns]
    LEK_ACCS = LEK_ACCS[ordered_columns]

    if "overall_accuracy" in LEK_ACCS.columns:
        LEK_ACCS = LEK_ACCS.sort_values(by="overall_accuracy", ascending=False)

    return LEK_ACCS


file_path = str(abs_path / "leaderboards/lek_accs.json")
with open(file_path, 'r', encoding='utf-8') as file:
    sample_data = pd.read_json(file_path).iloc[0].to_dict()  # Load the first row as a dict

ORDER_LIST_LEK, DATA_TYPES_LEK = generate_ORDER_LIST_LEK_and_data_types(sample_data)
LEK_ACCS = load_json_data(file_path, ORDER_LIST_LEK)
LEK_ACCS = LEK_ACCS.style.highlight_max(
    color = highlight_color,
    subset=LEK_ACCS.columns[1:]).format(precision=2)
COLUMN_HEADERS_LEK = ORDER_LIST_LEK


file_path = str(abs_path / "leaderboards/lek_en_accs.json")
with open(file_path, 'r', encoding='utf-8') as file:
    sample_data = pd.read_json(file_path).iloc[0].to_dict()  # Load the first row as a dict

ORDER_LIST_LEK_EN, DATA_TYPES_LEK_EN = generate_ORDER_LIST_LEK_and_data_types(sample_data)
LEK_ACCS_EN = load_json_data(file_path, ORDER_LIST_LEK_EN)
LEK_ACCS_EN = LEK_ACCS_EN.style.highlight_max(
    color = highlight_color,
    subset=LEK_ACCS_EN.columns[1:]).format(precision=2)
COLUMN_HEADERS_LEK_EN = ORDER_LIST_LEK_EN