Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
·
24a15c0
1
Parent(s):
dc863d8
Move from markdown table to csv table as easier to maintain for larger tables
Browse files- .vscode/extensions.json +1 -1
- README.md +9 -14
- requirements-dev.txt +1 -0
- streamlit_app.py +11 -119
.vscode/extensions.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
"recommendations": [
|
3 |
-
"
|
4 |
]
|
5 |
}
|
|
|
1 |
{
|
2 |
"recommendations": [
|
3 |
+
"janisdd.vscode-edit-csv"
|
4 |
]
|
5 |
}
|
README.md
CHANGED
@@ -1,18 +1,13 @@
|
|
1 |
# llm-leaderboard
|
2 |
-
A joint community effort to create one central leaderboard for LLMs
|
3 |
|
4 |
-
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
| Model Name | [Chatbot Arena Elo (llmsys)](https://lmsys.org/blog/2023-05-03-arena/) |
|
9 |
-
| --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
|
10 |
-
| [alpaca-13b](https://crfm.stanford.edu/2023/03/13/alpaca.html) | 1008 |
|
11 |
-
| [chatglm-6b](https://chatglm.cn/blog) | 985 |
|
12 |
-
| [dolly-v2-12b](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) | 944 |
|
13 |
-
| [fastchat-t5-3b](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0) | 951 |
|
14 |
-
| [koala-13b](https://bair.berkeley.edu/blog/2023/04/03/koala/) | 1082 |
|
15 |
-
| [llama-13b](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) | 932 |
|
16 |
-
| [stablelm-tuned-alpha-7b](https://github.com/stability-AI/stableLM) | 858 |
|
17 |
-
| [vicuna-13b](https://lmsys.org/blog/2023-03-30-vicuna/) | 1169 |
|
18 |
-
| [oasst-pythia-12b](https://open-assistant.io/) | 1065 |
|
|
|
1 |
# llm-leaderboard
|
2 |
+
A joint community effort to create one central leaderboard for LLMs. Contributions and corrections welcome!
|
3 |
|
4 |
+
## Leaderboard
|
5 |
|
6 |
+
Visit the interactive leaderboard at https://llm-leaderboard.streamlit.app/.
|
7 |
+
|
8 |
+
## How to contribute
|
9 |
+
|
10 |
+
You can contribute by:
|
11 |
+
- adding a new model as a new row
|
12 |
+
- adding a new benchmark as a new column
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements-dev.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
black
|
2 |
flake
|
|
|
3 |
mypy
|
|
|
1 |
black
|
2 |
flake
|
3 |
+
isort
|
4 |
mypy
|
streamlit_app.py
CHANGED
@@ -1,130 +1,24 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import streamlit as st
|
3 |
import io
|
|
|
|
|
4 |
import requests
|
5 |
-
import
|
6 |
|
7 |
REPO_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
8 |
|
9 |
|
10 |
-
def
|
11 |
-
"""Grabs
|
12 |
|
13 |
Args:
|
14 |
repo_url (str): URL of the GitHub repository.
|
|
|
15 |
|
16 |
Returns:
|
17 |
-
str: Content of the
|
18 |
-
"""
|
19 |
-
readme_url = repo_url.replace("github.com", "raw.githubusercontent.com") + "/main/README.md"
|
20 |
-
readme = requests.get(readme_url).text
|
21 |
-
return readme
|
22 |
-
|
23 |
-
|
24 |
-
def modify_from_markdown_links_to_html_links(text: str) -> str:
|
25 |
-
"""Modifies a markdown text to replace all markdown links with HTML links.
|
26 |
-
|
27 |
-
Example: [DISPLAY](LINK) to <a href=LINK, target="_blank">DISPLAY</a>
|
28 |
-
|
29 |
-
First find all markdown links with regex.
|
30 |
-
Then replace them with: <a href=$2, target="_blank">$1</a>
|
31 |
-
|
32 |
-
Args:
|
33 |
-
text (str): Markdown text containing markdown links
|
34 |
-
|
35 |
-
Returns:
|
36 |
-
str: Markdown text with HTML links.
|
37 |
-
"""
|
38 |
-
|
39 |
-
# find all markdown links
|
40 |
-
markdown_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text)
|
41 |
-
|
42 |
-
# replace them with HTML links
|
43 |
-
for display, link in markdown_links:
|
44 |
-
text = text.replace(f"[{display}]({link})", f'<a href="{link}" target="_blank">{display}</a>')
|
45 |
-
|
46 |
-
return text
|
47 |
-
|
48 |
-
|
49 |
-
def remove_markdown_links(text: str) -> str:
|
50 |
-
"""Modifies a markdown text to remove all markdown links.
|
51 |
-
|
52 |
-
Example: [DISPLAY](LINK) to DISPLAY
|
53 |
-
|
54 |
-
First find all markdown links with regex.
|
55 |
-
Then replace them with: $1
|
56 |
-
|
57 |
-
Args:
|
58 |
-
text (str): Markdown text containing markdown links
|
59 |
-
|
60 |
-
Returns:
|
61 |
-
str: Markdown text without markdown links.
|
62 |
"""
|
63 |
-
|
64 |
-
|
65 |
-
markdown_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text)
|
66 |
-
|
67 |
-
# remove link keep display text
|
68 |
-
for display, link in markdown_links:
|
69 |
-
text = text.replace(f"[{display}]({link})", display)
|
70 |
-
|
71 |
-
return text
|
72 |
-
|
73 |
-
|
74 |
-
def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataFrame:
|
75 |
-
"""Extracts a table from a markdown text and formats it as a pandas DataFrame.
|
76 |
-
|
77 |
-
Args:
|
78 |
-
text (str): Markdown text containing a table.
|
79 |
-
|
80 |
-
Returns:
|
81 |
-
pd.DataFrame: Table as pandas DataFrame.
|
82 |
-
"""
|
83 |
-
df = (
|
84 |
-
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
|
85 |
-
.dropna(axis=1, how="all") # drop empty columns
|
86 |
-
.iloc[1:] # drop first row which is the "----" separator of the original markdown table
|
87 |
-
)
|
88 |
-
|
89 |
-
# change all column datatypes to numeric
|
90 |
-
for col in df.columns:
|
91 |
-
df[col] = pd.to_numeric(df[col], errors="ignore")
|
92 |
-
|
93 |
-
# remove whitespace from column names and index
|
94 |
-
df.columns = df.columns.str.strip()
|
95 |
-
df.index = df.index.str.strip()
|
96 |
-
|
97 |
-
return df
|
98 |
-
|
99 |
-
|
100 |
-
def extract_markdown_table_from_multiline(multiline: str, table_headline: str) -> str:
|
101 |
-
"""Extracts the markdown table from a multiline string.
|
102 |
-
|
103 |
-
Args:
|
104 |
-
multiline (str): content of README.md file.
|
105 |
-
table_headline (str): Headline of the table in the README.md file.
|
106 |
-
|
107 |
-
Returns:
|
108 |
-
str: Markdown table.
|
109 |
-
|
110 |
-
Raises:
|
111 |
-
ValueError: If the table could not be found.
|
112 |
-
"""
|
113 |
-
# extract everything between the table headline and the next headline
|
114 |
-
table = []
|
115 |
-
start = False
|
116 |
-
for line in multiline.split("\n"):
|
117 |
-
if line.startswith(table_headline):
|
118 |
-
start = True
|
119 |
-
elif line.startswith("###"):
|
120 |
-
start = False
|
121 |
-
elif start:
|
122 |
-
table.append(line + "\n")
|
123 |
-
|
124 |
-
if len(table) == 0:
|
125 |
-
raise ValueError(f"Could not find table with headline '{table_headline}'")
|
126 |
-
|
127 |
-
return "".join(table)
|
128 |
|
129 |
|
130 |
def setup_basic():
|
@@ -145,10 +39,8 @@ def setup_basic():
|
|
145 |
|
146 |
|
147 |
def setup_table():
|
148 |
-
|
149 |
-
|
150 |
-
markdown_table = remove_markdown_links(markdown_table)
|
151 |
-
df = extract_table_and_format_from_markdown_text(markdown_table)
|
152 |
st.dataframe(df)
|
153 |
|
154 |
|
|
|
|
|
|
|
1 |
import io
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
import requests
|
5 |
+
import streamlit as st
|
6 |
|
7 |
REPO_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
8 |
|
9 |
|
10 |
+
def grab_file_from_repo(repo_url: str, filename: str) -> str:
|
11 |
+
"""Grabs a file from a GitHub repository.
|
12 |
|
13 |
Args:
|
14 |
repo_url (str): URL of the GitHub repository.
|
15 |
+
filename (str): Name of the file to grab.
|
16 |
|
17 |
Returns:
|
18 |
+
str: Content of the file.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
+
url = repo_url.replace("github.com", "raw.githubusercontent.com") + f"/main/{filename}"
|
21 |
+
return requests.get(url).text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def setup_basic():
|
|
|
39 |
|
40 |
|
41 |
def setup_table():
|
42 |
+
csv_table = grab_file_from_repo(REPO_URL, "leaderboard.csv")
|
43 |
+
df = pd.read_csv(io.StringIO(csv_table), index_col=0)
|
|
|
|
|
44 |
st.dataframe(df)
|
45 |
|
46 |
|