llm-perf-leaderboard

Running

File size: 2,344 Bytes

8dc4b22
c8763bd
 
67cbded
bee5389
ad5bd56
81f5492
ad5bd56
c8763bd
9dc4521
527deaa
e747f4e
9e3eaf4
 
 
 
 
df1a500
67b4a03
 
bee5389
b869fcb
2ff4a74
9dc4521
 
 
bee5389
9dc4521
2ff4a74
00642fb
ad5bd56
9dc4521
bee5389

TITLE = """<h1 align="center" id="space-title">🤗 Open LLM-Perf Leaderboard 🏋️</h1>"""

INTRODUCTION_TEXT = f"""
The 🤗 Open LLM-Perf Leaderboard 🏋️ aims to benchmark the performance (latency & throughput) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.

Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
- Model evaluation requests should be made in the [🤗 Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the 🤗 Open LLM-Perf Leaderboard 🏋️ automatically.
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
"""

A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
<ul>
    <li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
    <li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
    <li>Each pair of (LLM Type, Weight Class) is represented by the best scored LLM. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
    <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
    <li>Ranking is based on the euclidean distance from "perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
</ul>
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
  author = {Ilyas Moutawwakil, Régis Pierrard},
  title = {Open LLM-Perf Leaderboard},
  year = {2023},
  publisher = {Hugging Face},
  howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
@software{optimum-benchmark,
  author = {Ilyas Moutawwakil, Régis Pierrard},
  publisher = {Hugging Face},
  title = {Optimum-Benchmark: A framework for benchmarking the performance of Transformers models with different hardwares, backends and optimizations.},
}
"""