File size: 5,903 Bytes
ad57016
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
ad57016
 
61763a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad57016
 
 
6d2dbdb
 
 
ad57016
6d2dbdb
 
ad57016
6d2dbdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
ad57016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61763a4
 
 
 
 
 
 
 
ad57016
 
 
 
 
 
 
 
61763a4
 
 
 
 
 
 
 
ad57016
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go
import plotly.express as px


def convert_markdown_table_to_dataframe(md_content):
    """
    Converts a markdown table to a Pandas DataFrame, handling special characters, links, 
    and extracting Hugging Face URLs.
    """
    cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
    df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
    df = df.drop(0, axis=0)  # Remove first row if it's not the header
    df.columns = df.columns.str.strip()  # Clean column names

    # Extract Model names and URLs
    model_link_pattern = r'\[(.*?)\]\((.*?)\)'
    df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
    df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
    return df


def create_bar_chart(df, metric, color_map, key_suffix):
    """
    Creates and displays a bar chart for a given metric.
    """
    st.write(f"### {metric} Scores")
    if metric not in df.columns:
        st.write(f"No data available for {metric}.")
        return

    sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
    fig = go.Figure(go.Bar(
        x=sorted_df[metric],
        y=sorted_df['Model'],
        orientation='h',
        marker=dict(color=sorted_df[metric], colorscale=color_map)
    ))
    fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
    st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")


def create_radar_chart(df, metric_columns):
    """
    Create a radar chart for the top 10 models by "Average" score.
    """
    st.write("### Radar Chart (Top 10 Models by Average Score)")

    if 'Average' not in df.columns:
        st.write("Average column not found.")
        return

    top_10_df = df.nlargest(10, 'Average')
    if top_10_df.empty:
        st.write("No models available for the radar chart.")
        return

    radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')

    fig = go.Figure()

    for model_name, row in radar_data.iterrows():
        fig.add_trace(go.Scatterpolar(
            r=row.values,
            theta=metric_columns,
            fill='toself',
            name=model_name
        ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()])  # Dynamic range
        ),
        showlegend=True
    )

    st.plotly_chart(fig, use_container_width=True, key="radar_chart")


def main():
    st.set_page_config(page_title="SLM Leaderboard", layout="wide")
    st.title("\ud83c\udfc6 SLM Leaderboard")
    st.markdown("We record Nous and standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to include your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard)")

    # Default URL to your markdown file
    default_url = "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md"
    
    # Define benchmark URLs
    benchmarks = {
        "Nous benchmark results": "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md",
        "Standard LLM benchmarks": "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md"
    }

    # User text input
    md_url = st.text_input("This is the default location of the benchmarks and can be changed", default_url)

    # Buttons to select benchmarks
    for label, url in benchmarks.items():
        if st.button(label):
            md_url = url  # Update the URL based on button clicked
            st.experimental_rerun()  # Refresh the dashboard

    # Display the markdown file content
    st.markdown(f"Current dataset URL: {md_url}")

    if not md_url:
        st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
        return

    try:
        response = requests.get(md_url)
        response.raise_for_status()
        md_content = response.text

        df = convert_markdown_table_to_dataframe(md_content)
        
        # Automatically detect metrics (all columns except 'Model' and 'URL')
        metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]
        
        # Convert metric columns to numeric, handling errors gracefully
        for col in metric_columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Calculate "Average" score as a new column (exclude 'Model' and 'URL')
        df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
        if 'Average' not in metric_columns:
            metric_columns.append('Average')

        # Dropdown to select color map
        color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])

        # Sortable leaderboard table
        st.dataframe(
            df[['Model'] + metric_columns + ['URL']],
            use_container_width=True,
            hide_index=True,
        )

        # Bar charts for each metric
        for i, metric in enumerate(metric_columns):
            create_bar_chart(df, metric, color_map, key_suffix=i)

        # Extra bar chart for the "Average" score
        create_bar_chart(df, 'Average', color_map, key_suffix="average")

        # Radar chart for the top 10 models by "Average" score
        create_radar_chart(df, metric_columns)

    except Exception as e:
        st.error(f"An error occurred while processing the markdown table: {e}")


if __name__ == "__main__":
    main()