File size: 1,795 Bytes
4c1e130
 
ebac224
4c1e130
 
 
ebac224
4c1e130
ebac224
 
 
 
4c1e130
 
 
ebac224
4c1e130
 
ebac224
4c1e130
 
ebac224
4c1e130
ebac224
4c1e130
 
ebac224
 
 
 
 
 
 
 
 
 
4c1e130
ebac224
 
 
4c1e130
 
ec7c43d
 
 
 
 
ebac224
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from data.model_handler import ModelHandler


def make_clickable_model(model_name, link=None):
    if link is None:
        desanitized_model_name = model_name.replace("_", "/")
        desanitized_model_name = desanitized_model_name.replace("-thisisapoint-", ".")

        if "/captioning" in desanitized_model_name:
            desanitized_model_name = desanitized_model_name.replace("/captioning", "")
        if "/ocr" in desanitized_model_name:
            desanitized_model_name = desanitized_model_name.replace("/ocr", "")

        link = "https://huggingface.co/" + desanitized_model_name

    return f'<a target="_blank" style="text-decoration: underline" href="{link}">{desanitized_model_name}</a>'


def add_rank_and_format(df, benchmark_version=1):
    df = df.reset_index()
    df = df.rename(columns={"index": "Model"})
    df = ModelHandler.add_rank(df, benchmark_version)
    df["Model"] = df["Model"].apply(make_clickable_model)
    # df = remove_duplicates(df)
    return df


def remove_duplicates(df):
    """Remove duplicate models based on their name (after the last '/' if present)."""
    df["model_name"] = df["Model"].str.replace("_", "/")
    df = df.sort_values("Rank").drop_duplicates(subset=["model_name"], keep="first")
    df = df.drop("model_name", axis=1)
    return df


def get_refresh_function(model_handler, benchmark_version):
    def _refresh(metric):
        model_handler.get_vidore_data(metric)
        data_task_category = model_handler.compute_averages(metric, benchmark_version)
        df = add_rank_and_format(data_task_category, benchmark_version)
        return df

    return _refresh


def filter_models(data, search_term):
    if search_term:
        data = data[data["Model"].str.contains(search_term, case=False, na=False)]
    return data