Spaces:

amu-cai
/

Polish_Medical_Exams

Running

File size: 3,149 Bytes

54f0e5a
 
498bbe0
54f0e5a
 
 
 
2051b0b
 
54f0e5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d09c531
54f0e5a
 
 
 
 
 
 
 
 
 
 
d09c531
54f0e5a
 
 
 
 
 
 
 
 
d09c531
 
 
 
54f0e5a
 
 
d09c531
 
54f0e5a
 
 
 
d09c531
92c2d28
54f0e5a
cbd517c
54f0e5a
 
 
15007e1
498bbe0
15007e1
54f0e5a
 
cbd517c
 
 
 
 
15007e1
 
10fa934
498bbe0
10fa934
cbd517c

import pandas as pd
from pathlib import Path
from ..styles import highlight_color

abs_path = Path(__file__).parent.parent.parent

def replace_models_names(model_name):
    if "gpt" in model_name:
        return model_name
    replaces = {'meta-llama': 'meta_llama',
        'epfl-llm':'epfl_llm',
        '01-ai':'01_ai'}
    new_name = model_name.replace('model-', '')
    for k, v in replaces.items():
        if new_name.startswith(k):
            new_name = new_name.replace(k, v)
    new_name = new_name.replace('-','/',1)
    new_name = new_name.replace('_','-',1)
    new_name = f"[{new_name}](https://huggingface.co/{new_name})"
    return new_name

def generate_ORDER_LIST_LDEK_and_data_types(json_data):
    ORDER_LIST_LDEK = ["model_name", "overall_accuracy"]
    data_types = ["markdown", "number"]

    for key in json_data.keys():
        if key not in ["model_name", "overall_accuracy"]:
            ORDER_LIST_LDEK.append(key)
            data_types.append("number")
    ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:])
    return ORDER_LIST_LDEK, data_types

def filter_columns_ldek(column_choices):
    selected_columns = [col for col in ORDER_LIST_LDEK if col in column_choices]
    return LDEK_ACCS[selected_columns]

def load_json_data(file_path, ORDER_LIST_LDEK):
    LDEK_ACCS = pd.read_json(file_path)

    for column in LDEK_ACCS.columns:
        if LDEK_ACCS[column].apply(type).eq(dict).any():
            LDEK_ACCS[column] = LDEK_ACCS[column].apply(str)

    LDEK_ACCS["model_name"] = LDEK_ACCS["model_name"].apply(
        lambda name: replace_models_names(name)
    )

    for column in LDEK_ACCS.select_dtypes(include='number').columns:
        LDEK_ACCS[column] = LDEK_ACCS[column].round(2)

    LDEK_ACCS["overall_accuracy"] = pd.to_numeric(LDEK_ACCS["overall_accuracy"], errors='coerce')

    ordered_columns = [col for col in ORDER_LIST_LDEK if col in LDEK_ACCS.columns]
    LDEK_ACCS = LDEK_ACCS[ordered_columns]

    if "overall_accuracy" in LDEK_ACCS.columns:
        LDEK_ACCS = LDEK_ACCS.sort_values(by="overall_accuracy", ascending=False)

    return LDEK_ACCS



file_path = str(abs_path / "leaderboards/ldek_accs.json")
with open(file_path, 'r', encoding='utf-8') as file:
    sample_data = pd.read_json(file_path).iloc[0].to_dict()

ORDER_LIST_LDEK, DATA_TYPES_LDEK = generate_ORDER_LIST_LDEK_and_data_types(sample_data)
LDEK_ACCS = load_json_data(file_path, ORDER_LIST_LDEK)
LDEK_ACCS = LDEK_ACCS.style.highlight_max(
    color = highlight_color,
    subset=LDEK_ACCS.columns[1:]).format(precision=2)
COLUMN_HEADERS_LDEK = ORDER_LIST_LDEK
print(ORDER_LIST_LDEK)


file_path2 = str(abs_path / "leaderboards/ldek_en_accs.json")
with open(file_path, 'r', encoding='utf-8') as file:
    sample_data2 = pd.read_json(file_path).iloc[0].to_dict()
ORDER_LIST_LDEK_EN, DATA_TYPES_LDEK_EN = generate_ORDER_LIST_LDEK_and_data_types(sample_data2)
LDEK_EN_ACCS = load_json_data(file_path2, ORDER_LIST_LDEK_EN)
LDEK_EN_ACCS = LDEK_EN_ACCS.style.highlight_max(
    color = highlight_color,
    subset=LDEK_EN_ACCS.columns[1:]).format(precision=2)
COLUMN_HEADERS_LDEK_EN = ORDER_LIST_LDEK_EN
print(ORDER_LIST_LDEK_EN)