File size: 5,609 Bytes
ad57016
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
ad57016
 
61763a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad57016
 
 
6d2dbdb
992f74a
 
ad57016
992f74a
 
 
ad57016
992f74a
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
 
 
 
 
 
 
 
ad57016
 
 
 
 
 
 
 
61763a4
 
 
 
 
 
 
 
ad57016
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go
import plotly.express as px


def convert_markdown_table_to_dataframe(md_content):
    """
    Converts a markdown table to a Pandas DataFrame, handling special characters, links, 
    and extracting Hugging Face URLs.
    """
    cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
    df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
    df = df.drop(0, axis=0)  # Remove first row if it's not the header
    df.columns = df.columns.str.strip()  # Clean column names

    # Extract Model names and URLs
    model_link_pattern = r'\[(.*?)\]\((.*?)\)'
    df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
    df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
    return df


def create_bar_chart(df, metric, color_map, key_suffix):
    """
    Creates and displays a bar chart for a given metric.
    """
    st.write(f"### {metric} Scores")
    if metric not in df.columns:
        st.write(f"No data available for {metric}.")
        return

    sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
    fig = go.Figure(go.Bar(
        x=sorted_df[metric],
        y=sorted_df['Model'],
        orientation='h',
        marker=dict(color=sorted_df[metric], colorscale=color_map)
    ))
    fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
    st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")


def create_radar_chart(df, metric_columns):
    """
    Create a radar chart for the top 10 models by "Average" score.
    """
    st.write("### Radar Chart (Top 10 Models by Average Score)")

    if 'Average' not in df.columns:
        st.write("Average column not found.")
        return

    top_10_df = df.nlargest(10, 'Average')
    if top_10_df.empty:
        st.write("No models available for the radar chart.")
        return

    radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')

    fig = go.Figure()

    for model_name, row in radar_data.iterrows():
        fig.add_trace(go.Scatterpolar(
            r=row.values,
            theta=metric_columns,
            fill='toself',
            name=model_name
        ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()])  # Dynamic range
        ),
        showlegend=True
    )

    st.plotly_chart(fig, use_container_width=True, key="radar_chart")


def main():
    st.set_page_config(page_title="SLM Leaderboard", layout="wide")
    st.title("🏆 SLM Leaderboard")
    st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")

    # URL to your markdown file
    md_url = st.text_input("This the default location of the bechmarks and can be changed", 
                           "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")
    

    st.markdown("""
                Copy the following links into the textbox above and refresh dashboard:

                - [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
                - [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)

    if not md_url:
        st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
        return

    try:
        response = requests.get(md_url)
        response.raise_for_status()
        md_content = response.text

        df = convert_markdown_table_to_dataframe(md_content)
        
        # Automatically detect metrics (all columns except 'Model' and 'URL')
        metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]
        
        # Convert metric columns to numeric, handling errors gracefully
        for col in metric_columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Calculate "Average" score as a new column (exclude 'Model' and 'URL')
        df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
        if 'Average' not in metric_columns:
            metric_columns.append('Average')

        # Dropdown to select color map
        color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])

        # Sortable leaderboard table
        st.dataframe(
            df[['Model'] + metric_columns + ['URL']],
            use_container_width=True,
            hide_index=True,
        )

        # Bar charts for each metric
        for i, metric in enumerate(metric_columns):
            create_bar_chart(df, metric, color_map, key_suffix=i)

        # Extra bar chart for the "Average" score
        create_bar_chart(df, 'Average', color_map, key_suffix="average")

        # Radar chart for the top 10 models by "Average" score
        create_radar_chart(df, metric_columns)

    except Exception as e:
        st.error(f"An error occurred while processing the markdown table: {e}")


if __name__ == "__main__":
    main()