Spaces:

w601sxs
/

SLM-Leaderboard

Runtime error

File size: 5,609 Bytes

import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go
import plotly.express as px


def convert_markdown_table_to_dataframe(md_content):
    """
    Converts a markdown table to a Pandas DataFrame, handling special characters, links, 
    and extracting Hugging Face URLs.
    """
    cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
    df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
    df = df.drop(0, axis=0)  # Remove first row if it's not the header
    df.columns = df.columns.str.strip()  # Clean column names

    # Extract Model names and URLs
    model_link_pattern = r'\[(.*?)\]\((.*?)\)'
    df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
    df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
    return df


def create_bar_chart(df, metric, color_map, key_suffix):
    """
    Creates and displays a bar chart for a given metric.
    """
    st.write(f"### {metric} Scores")
    if metric not in df.columns:
        st.write(f"No data available for {metric}.")
        return

    sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
    fig = go.Figure(go.Bar(
        x=sorted_df[metric],
        y=sorted_df['Model'],
        orientation='h',
        marker=dict(color=sorted_df[metric], colorscale=color_map)
    ))
    fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
    st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")


def create_radar_chart(df, metric_columns):
    """
    Create a radar chart for the top 10 models by "Average" score.
    """
    st.write("### Radar Chart (Top 10 Models by Average Score)")

    if 'Average' not in df.columns:
        st.write("Average column not found.")
        return

    top_10_df = df.nlargest(10, 'Average')
    if top_10_df.empty:
        st.write("No models available for the radar chart.")
        return

    radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')

    fig = go.Figure()

    for model_name, row in radar_data.iterrows():
        fig.add_trace(go.Scatterpolar(
            r=row.values,
            theta=metric_columns,
            fill='toself',
            name=model_name
        ))

    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()])  # Dynamic range
        ),
        showlegend=True
    )

    st.plotly_chart(fig, use_container_width=True, key="radar_chart")


def main():
    st.set_page_config(page_title="SLM Leaderboard", layout="wide")
    st.title("🏆 SLM Leaderboard")
    st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")

    # URL to your markdown file
    md_url = st.text_input("This the default location of the bechmarks and can be changed", 
                           "https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")
    

    st.markdown("""
                Copy the following links into the textbox above and refresh dashboard:

                - [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
                - [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)

    if not md_url:
        st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
        return

    try:
        response = requests.get(md_url)
        response.raise_for_status()
        md_content = response.text

        df = convert_markdown_table_to_dataframe(md_content)
        
        # Automatically detect metrics (all columns except 'Model' and 'URL')
        metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]
        
        # Convert metric columns to numeric, handling errors gracefully
        for col in metric_columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        # Calculate "Average" score as a new column (exclude 'Model' and 'URL')
        df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
        if 'Average' not in metric_columns:
            metric_columns.append('Average')

        # Dropdown to select color map
        color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])

        # Sortable leaderboard table
        st.dataframe(
            df[['Model'] + metric_columns + ['URL']],
            use_container_width=True,
            hide_index=True,
        )

        # Bar charts for each metric
        for i, metric in enumerate(metric_columns):
            create_bar_chart(df, metric, color_map, key_suffix=i)

        # Extra bar chart for the "Average" score
        create_bar_chart(df, 'Average', color_map, key_suffix="average")

        # Radar chart for the top 10 models by "Average" score
        create_radar_chart(df, metric_columns)

    except Exception as e:
        st.error(f"An error occurred while processing the markdown table: {e}")


if __name__ == "__main__":
    main()