Spaces:
Runtime error
Runtime error
File size: 5,609 Bytes
ad57016 61763a4 ad57016 61763a4 ad57016 61763a4 ad57016 61763a4 ad57016 6d2dbdb 992f74a ad57016 992f74a ad57016 992f74a 61763a4 ad57016 61763a4 ad57016 61763a4 ad57016 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go
import plotly.express as px
def convert_markdown_table_to_dataframe(md_content):
"""
Converts a markdown table to a Pandas DataFrame, handling special characters, links,
and extracting Hugging Face URLs.
"""
cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
df = df.drop(0, axis=0) # Remove first row if it's not the header
df.columns = df.columns.str.strip() # Clean column names
# Extract Model names and URLs
model_link_pattern = r'\[(.*?)\]\((.*?)\)'
df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
return df
def create_bar_chart(df, metric, color_map, key_suffix):
"""
Creates and displays a bar chart for a given metric.
"""
st.write(f"### {metric} Scores")
if metric not in df.columns:
st.write(f"No data available for {metric}.")
return
sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
fig = go.Figure(go.Bar(
x=sorted_df[metric],
y=sorted_df['Model'],
orientation='h',
marker=dict(color=sorted_df[metric], colorscale=color_map)
))
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")
def create_radar_chart(df, metric_columns):
"""
Create a radar chart for the top 10 models by "Average" score.
"""
st.write("### Radar Chart (Top 10 Models by Average Score)")
if 'Average' not in df.columns:
st.write("Average column not found.")
return
top_10_df = df.nlargest(10, 'Average')
if top_10_df.empty:
st.write("No models available for the radar chart.")
return
radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')
fig = go.Figure()
for model_name, row in radar_data.iterrows():
fig.add_trace(go.Scatterpolar(
r=row.values,
theta=metric_columns,
fill='toself',
name=model_name
))
fig.update_layout(
polar=dict(
radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range
),
showlegend=True
)
st.plotly_chart(fig, use_container_width=True, key="radar_chart")
def main():
st.set_page_config(page_title="SLM Leaderboard", layout="wide")
st.title("🏆 SLM Leaderboard")
st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")
# URL to your markdown file
md_url = st.text_input("This the default location of the bechmarks and can be changed",
"https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")
st.markdown("""
Copy the following links into the textbox above and refresh dashboard:
- [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
- [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)
if not md_url:
st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
return
try:
response = requests.get(md_url)
response.raise_for_status()
md_content = response.text
df = convert_markdown_table_to_dataframe(md_content)
# Automatically detect metrics (all columns except 'Model' and 'URL')
metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]
# Convert metric columns to numeric, handling errors gracefully
for col in metric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Calculate "Average" score as a new column (exclude 'Model' and 'URL')
df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
if 'Average' not in metric_columns:
metric_columns.append('Average')
# Dropdown to select color map
color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])
# Sortable leaderboard table
st.dataframe(
df[['Model'] + metric_columns + ['URL']],
use_container_width=True,
hide_index=True,
)
# Bar charts for each metric
for i, metric in enumerate(metric_columns):
create_bar_chart(df, metric, color_map, key_suffix=i)
# Extra bar chart for the "Average" score
create_bar_chart(df, 'Average', color_map, key_suffix="average")
# Radar chart for the top 10 models by "Average" score
create_radar_chart(df, metric_columns)
except Exception as e:
st.error(f"An error occurred while processing the markdown table: {e}")
if __name__ == "__main__":
main()
|