Update space
Browse files
app.py
CHANGED
|
@@ -128,6 +128,29 @@ def overall_leaderboard(dataframe):
|
|
| 128 |
)
|
| 129 |
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
demo = gr.Blocks(css=custom_css)
|
| 133 |
with demo:
|
|
@@ -139,7 +162,7 @@ with demo:
|
|
| 139 |
INTRODUCTION_TEXT_FONT_SIZE = 16
|
| 140 |
INTRODUCTION_TEXT = (
|
| 141 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 142 |
-
'<strong>Decentralized Arena</strong> automates, scales, and accelerates
|
| 143 |
'for large language model (LLM) evaluation across diverse, fine-grained dimensions, '
|
| 144 |
'such as mathematics (algebra, geometry, probability), logical reasoning, social reasoning, science (chemistry, physics, biology), or any user-defined dimensions. '
|
| 145 |
'The evaluation is decentralized and democratic, with all participating LLMs assessing each other to ensure unbiased and fair results. '
|
|
@@ -175,7 +198,7 @@ with demo:
|
|
| 175 |
|
| 176 |
TEXT = (
|
| 177 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 178 |
-
'Total #models: 57 (Last updated: 2024-10-21)'
|
| 179 |
'</p>'
|
| 180 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 181 |
'This page prvovides a comprehensive overview of model ranks across various dimensions, based on their averaged ranks. '
|
|
|
|
| 128 |
)
|
| 129 |
|
| 130 |
|
| 131 |
+
# Your leaderboard name
|
| 132 |
+
TITLE = """<h1 align="center" id="space-title">Decentralized Arena Leaderboard</h1>"""
|
| 133 |
+
|
| 134 |
+
SUB_TITLE = """<h2 align="center" id="space-subtitle">Automated, Robust, and Transparent LLM Evaluation for Numerous Dimensions</h2>"""
|
| 135 |
+
|
| 136 |
+
EXTERNAL_LINKS = """
|
| 137 |
+
<h2 align="center" id="space-links">
|
| 138 |
+
<a href="https://de-arena.maitrix.org/" target="_blank">Blog</a> |
|
| 139 |
+
<a href="https://github.com/maitrix-org/de-arena" target="_blank">GitHub</a> |
|
| 140 |
+
<a href="https://de-arena.maitrix.org/images/Heading.mp4" target="">Video</a> |
|
| 141 |
+
<a href="https://maitrix.org/" target="_blank">@Maitrix.org</a> |
|
| 142 |
+
<a href="https://www.llm360.ai/" target="_blank">@LLM360</a>
|
| 143 |
+
</h2>
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
# What does your leaderboard evaluate?
|
| 147 |
+
INTRODUCTION_TEXT = """
|
| 148 |
+
**Decentralized Arena** automates and scales "Chatbot Arena" for LLM evaluation across various fine-grained dimensions
|
| 149 |
+
(e.g., math – algebra, geometry, probability; logical reasoning, social reasoning, biology, chemistry, …).
|
| 150 |
+
The evaluation is decentralized and democratic, with all LLMs participating in evaluating others.
|
| 151 |
+
It achieves a 95\% correlation with Chatbot Arena's overall rankings, while being fully transparent and reproducible.
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
|
| 155 |
demo = gr.Blocks(css=custom_css)
|
| 156 |
with demo:
|
|
|
|
| 162 |
INTRODUCTION_TEXT_FONT_SIZE = 16
|
| 163 |
INTRODUCTION_TEXT = (
|
| 164 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 165 |
+
'<strong>Decentralized Arena</strong> automates, scales, and accelerates <a href="https://lmarena.ai/">Chatbot Arena</a> '
|
| 166 |
'for large language model (LLM) evaluation across diverse, fine-grained dimensions, '
|
| 167 |
'such as mathematics (algebra, geometry, probability), logical reasoning, social reasoning, science (chemistry, physics, biology), or any user-defined dimensions. '
|
| 168 |
'The evaluation is decentralized and democratic, with all participating LLMs assessing each other to ensure unbiased and fair results. '
|
|
|
|
| 198 |
|
| 199 |
TEXT = (
|
| 200 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 201 |
+
'<b>Total #models: 57 (Last updated: 2024-10-21)</b>'
|
| 202 |
'</p>'
|
| 203 |
f'<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
|
| 204 |
'This page prvovides a comprehensive overview of model ranks across various dimensions, based on their averaged ranks. '
|