maximedenes commited on
Commit
7b6b120
Β·
1 Parent(s): 04e1112

New frontend, based on new Open LLM Leaderboard version

Browse files

We also modernize the build chain.

Note: we import some code processing data from the `results` dataset, which was
actually authored by ClΓ©mentine Fourrier and Mauricio Diaz.

This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +3 -0
  2. Dockerfile +5 -4
  3. backend/app/config/base.py +2 -5
  4. backend/app/config/hf_config.py +2 -1
  5. backend/app/services/leaderboard.py +331 -97
  6. backend/app/services/models.py +7 -4
  7. docker-compose.yml +1 -2
  8. frontend/Dockerfile.dev +2 -5
  9. frontend/README.md +5 -77
  10. frontend/dist/assets/index-DMNVfo70.js +0 -0
  11. frontend/dist/assets/index-kQJbKSsj.css +1 -0
  12. frontend/{public β†’ dist}/index.html +7 -15
  13. frontend/dist/vite.svg +1 -0
  14. frontend/eslint.config.js +38 -0
  15. frontend/index.html +94 -0
  16. frontend/package.json +26 -35
  17. frontend/public/logo256.png +0 -0
  18. frontend/public/logo32.png +0 -0
  19. frontend/public/og-image.jpg +0 -0
  20. frontend/public/robots.txt +0 -3
  21. frontend/public/vite.svg +1 -0
  22. frontend/{server.js β†’ server.cjs} +1 -1
  23. frontend/src/App.css +42 -0
  24. frontend/src/{App.js β†’ App.jsx} +37 -2
  25. frontend/src/assets/react.svg +1 -0
  26. frontend/src/components/Footer/{Footer.js β†’ Footer.jsx} +0 -0
  27. frontend/src/components/Logo/{HFLogo.js β†’ HFLogo.jsx} +0 -0
  28. frontend/src/components/Logo/{Logo.js β†’ Logo.jsx} +0 -0
  29. frontend/src/components/Navigation/{Navigation.js β†’ Navigation.jsx} +0 -0
  30. frontend/src/components/shared/{AuthContainer.js β†’ AuthContainer.jsx} +0 -0
  31. frontend/src/components/shared/{FilterTag.js β†’ FilterTag.jsx} +0 -0
  32. frontend/src/components/shared/{InfoIconWithTooltip.js β†’ InfoIconWithTooltip.jsx} +0 -0
  33. frontend/src/components/shared/{PageHeader.js β†’ PageHeader.jsx} +0 -0
  34. frontend/src/index.css +68 -0
  35. frontend/src/index.js +0 -10
  36. frontend/src/main.jsx +12 -0
  37. frontend/src/pages/AddModelPage/{AddModelPage.js β†’ AddModelPage.jsx} +0 -0
  38. frontend/src/pages/AddModelPage/components/EvaluationQueues/{EvaluationQueues.js β†’ EvaluationQueues.jsx} +0 -0
  39. frontend/src/pages/AddModelPage/components/ModelSubmissionForm/{ModelSubmissionForm.js β†’ ModelSubmissionForm.jsx} +2 -2
  40. frontend/src/pages/AddModelPage/components/SubmissionGuide/{SubmissionGuide.js β†’ SubmissionGuide.jsx} +0 -0
  41. frontend/src/pages/LeaderboardPage/{LeaderboardPage.js β†’ LeaderboardPage.jsx} +2 -2
  42. frontend/src/pages/LeaderboardPage/components/Leaderboard/{Leaderboard.js β†’ Leaderboard.jsx} +0 -0
  43. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/ColumnSelector/{ColumnSelector.js β†’ ColumnSelector.jsx} +0 -0
  44. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/DisplayOptions/{DisplayOptions.js β†’ DisplayOptions.jsx} +0 -0
  45. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{FilteredModelCount.js β†’ FilteredModelCount.jsx} +0 -0
  46. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{Filters.js β†’ Filters.jsx} +0 -0
  47. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{QuickFilters.js β†’ QuickFilters.jsx} +0 -0
  48. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{SearchBar.js β†’ SearchBar.jsx} +0 -0
  49. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/{PerformanceMonitor.js β†’ PerformanceMonitor.jsx} +0 -0
  50. frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Table/{Table.js β†’ Table.jsx} +0 -0
.gitignore CHANGED
@@ -43,3 +43,6 @@ package-lock.json
43
  .env.*
44
  !.env.example
45
 
 
 
 
 
43
  .env.*
44
  !.env.example
45
 
46
+
47
+ logs/
48
+ __pycache__/
Dockerfile CHANGED
@@ -2,10 +2,10 @@
2
  FROM node:18 as frontend-build
3
  WORKDIR /app
4
  COPY frontend/package*.json ./
5
- RUN npm install
6
  COPY frontend/ ./
7
 
8
- RUN npm run build
9
 
10
  # Build backend
11
  FROM python:3.12-slim
@@ -36,6 +36,7 @@ RUN apt-get update && apt-get install -y \
36
  && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
37
  && apt-get install -y nodejs \
38
  && rm -rf /var/lib/apt/lists/*
 
39
 
40
  # Copy frontend server and build
41
  COPY --from=frontend-build /app/build ./frontend/build
@@ -44,7 +45,7 @@ COPY --from=frontend-build /app/server.js ./frontend/
44
 
45
  # Install frontend production dependencies
46
  WORKDIR /app/frontend
47
- RUN npm install --production
48
  WORKDIR /app
49
 
50
  # Environment variables
@@ -59,4 +60,4 @@ USER user
59
  EXPOSE 7860
60
 
61
  # Start both servers with wait-for
62
- CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && npm run serve"]
 
2
  FROM node:18 as frontend-build
3
  WORKDIR /app
4
  COPY frontend/package*.json ./
5
+ RUN yarn install
6
  COPY frontend/ ./
7
 
8
+ RUN yarn build
9
 
10
  # Build backend
11
  FROM python:3.12-slim
 
36
  && curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
37
  && apt-get install -y nodejs \
38
  && rm -rf /var/lib/apt/lists/*
39
+ RUN npm install --global yarn
40
 
41
  # Copy frontend server and build
42
  COPY --from=frontend-build /app/build ./frontend/build
 
45
 
46
  # Install frontend production dependencies
47
  WORKDIR /app/frontend
48
+ RUN yarn install --production
49
  WORKDIR /app
50
 
51
  # Environment variables
 
60
  EXPOSE 7860
61
 
62
  # Start both servers with wait-for
63
+ CMD ["sh", "-c", "uvicorn app.asgi:app --host 0.0.0.0 --port 7861 & while ! nc -z localhost 7861; do sleep 1; done && cd frontend && node index.cjs"]
backend/app/config/base.py CHANGED
@@ -20,7 +20,7 @@ HAS_HIGHER_RATE_LIMIT = []
20
 
21
  # HuggingFace configuration
22
  HF_TOKEN = os.environ.get("HF_TOKEN")
23
- HF_ORGANIZATION = "open-llm-leaderboard"
24
  API = {
25
  "INFERENCE": "https://api-inference.huggingface.co/models",
26
  "HUB": "https://huggingface.co"
@@ -32,7 +32,4 @@ DATASETS_CACHE = CACHE_ROOT / "datasets"
32
  MODELS_CACHE = CACHE_ROOT / "models"
33
  VOTES_CACHE = CACHE_ROOT / "votes"
34
  EVAL_CACHE = CACHE_ROOT / "eval-queue"
35
-
36
- # Repository configuration
37
- QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
38
- EVAL_REQUESTS_PATH = EVAL_CACHE / "eval_requests.jsonl"
 
20
 
21
  # HuggingFace configuration
22
  HF_TOKEN = os.environ.get("HF_TOKEN")
23
+ HF_ORGANIZATION = "fr-gouv-coordination-ia"
24
  API = {
25
  "INFERENCE": "https://api-inference.huggingface.co/models",
26
  "HUB": "https://huggingface.co"
 
32
  MODELS_CACHE = CACHE_ROOT / "models"
33
  VOTES_CACHE = CACHE_ROOT / "votes"
34
  EVAL_CACHE = CACHE_ROOT / "eval-queue"
35
+ RESULTS_CACHE = CACHE_ROOT / "results"
 
 
 
backend/app/config/hf_config.py CHANGED
@@ -8,7 +8,7 @@ from app.core.cache import cache_config
8
  logger = logging.getLogger(__name__)
9
 
10
  # Organization or user who owns the datasets
11
- HF_ORGANIZATION = "open-llm-leaderboard"
12
 
13
  # Get HF token directly from environment
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -23,6 +23,7 @@ QUEUE_REPO = f"{HF_ORGANIZATION}/requests"
23
  AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/votes"
25
  OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/official-providers"
 
26
 
27
  # File paths from cache config
28
  VOTES_PATH = cache_config.votes_file
 
8
  logger = logging.getLogger(__name__)
9
 
10
  # Organization or user who owns the datasets
11
+ HF_ORGANIZATION = "fr-gouv-coordination-ia"
12
 
13
  # Get HF token directly from environment
14
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
23
  AGGREGATED_REPO = f"{HF_ORGANIZATION}/contents"
24
  VOTES_REPO = f"{HF_ORGANIZATION}/votes"
25
  OFFICIAL_PROVIDERS_REPO = f"{HF_ORGANIZATION}/official-providers"
26
+ RESULTS_REPO = f"{HF_ORGANIZATION}/results"
27
 
28
  # File paths from cache config
29
  VOTES_PATH = cache_config.votes_file
backend/app/services/leaderboard.py CHANGED
@@ -6,34 +6,284 @@ from fastapi import HTTPException
6
  import logging
7
  from app.config.base import HF_ORGANIZATION
8
  from app.core.formatting import LogFormatter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class LeaderboardService:
13
  def __init__(self):
 
14
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- async def fetch_raw_data(self) -> List[Dict[str, Any]]:
17
  """Fetch raw leaderboard data from HuggingFace dataset"""
18
  try:
19
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
20
  logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
21
-
22
- dataset = datasets.load_dataset(
23
- f"{HF_ORGANIZATION}/contents",
24
- cache_dir=cache_config.get_cache_path("datasets")
25
- )["train"]
26
-
27
- df = dataset.to_pandas()
28
- data = df.to_dict('records')
29
-
30
- stats = {
31
- "Total_Entries": len(data),
32
- "Dataset_Size": f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.1f}MB"
33
- }
34
- for line in LogFormatter.stats(stats, "Dataset Statistics"):
35
- logger.info(line)
36
-
37
  return data
38
 
39
  except Exception as e:
@@ -95,114 +345,98 @@ class LeaderboardService:
95
  logger.error(LogFormatter.error("Failed to format leaderboard data", e))
96
  raise HTTPException(status_code=500, detail=str(e))
97
 
98
- async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
99
  """Transform raw data into the format expected by the frontend"""
100
  try:
101
  # Extract model name for logging
102
- model_name = data.get("fullname", "Unknown")
103
  logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
104
 
105
  # Create unique ID combining model name, precision, sha and chat template status
106
- unique_id = f"{data.get('fullname', 'Unknown')}_{data.get('Precision', 'Unknown')}_{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
107
-
108
  evaluations = {
109
- "ifeval": {
110
- "name": "IFEval",
111
- "value": data.get("IFEval Raw", 0),
112
- "normalized_score": data.get("IFEval", 0)
113
- },
114
- "bbh": {
115
- "name": "BBH",
116
- "value": data.get("BBH Raw", 0),
117
- "normalized_score": data.get("BBH", 0)
118
- },
119
- "math": {
120
- "name": "MATH Level 5",
121
- "value": data.get("MATH Lvl 5 Raw", 0),
122
- "normalized_score": data.get("MATH Lvl 5", 0)
123
- },
124
- "gpqa": {
125
- "name": "GPQA",
126
- "value": data.get("GPQA Raw", 0),
127
- "normalized_score": data.get("GPQA", 0)
128
  },
129
- "musr": {
130
- "name": "MUSR",
131
- "value": data.get("MUSR Raw", 0),
132
- "normalized_score": data.get("MUSR", 0)
133
  },
134
- "mmlu_pro": {
135
- "name": "MMLU-PRO",
136
- "value": data.get("MMLU-PRO Raw", 0),
137
- "normalized_score": data.get("MMLU-PRO", 0)
138
  }
139
  }
140
 
141
- features = {
142
- "is_not_available_on_hub": data.get("Available on the hub", False),
143
- "is_merged": data.get("Merged", False),
144
- "is_moe": data.get("MoE", False),
145
- "is_flagged": data.get("Flagged", False),
146
- "is_official_provider": data.get("Official Providers", False)
147
- }
148
 
149
- metadata = {
150
- "upload_date": data.get("Upload To Hub Date"),
151
- "submission_date": data.get("Submission Date"),
152
- "generation": data.get("Generation"),
153
- "base_model": data.get("Base Model"),
154
- "hub_license": data.get("Hub License"),
155
- "hub_hearts": data.get("Hub ❀️"),
156
- "params_billions": data.get("#Params (B)"),
157
- "co2_cost": data.get("COβ‚‚ cost (kg)", 0)
158
- }
 
 
 
 
 
 
 
 
 
 
159
 
160
  # Clean model type by removing emojis if present
161
- original_type = data.get("Type", "")
162
- model_type = original_type.lower().strip()
163
-
164
- # Remove emojis and parentheses
165
- if "(" in model_type:
166
- model_type = model_type.split("(")[0].strip()
167
- model_type = ''.join(c for c in model_type if not c in 'πŸ”ΆπŸŸ’πŸŸ©πŸ’¬πŸ€πŸŒΈ ')
168
-
169
  # Map old model types to new ones
170
- model_type_mapping = {
171
- "fine-tuned": "fined-tuned-on-domain-specific-dataset",
172
- "fine tuned": "fined-tuned-on-domain-specific-dataset",
173
- "finetuned": "fined-tuned-on-domain-specific-dataset",
174
- "fine_tuned": "fined-tuned-on-domain-specific-dataset",
175
- "ft": "fined-tuned-on-domain-specific-dataset",
176
- "finetuning": "fined-tuned-on-domain-specific-dataset",
177
- "fine tuning": "fined-tuned-on-domain-specific-dataset",
178
- "fine-tuning": "fined-tuned-on-domain-specific-dataset"
179
- }
180
 
181
- mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
182
-
183
- if mapped_type != model_type:
184
- logger.debug(LogFormatter.info(f"Model type mapped: {original_type} -> {mapped_type}"))
185
 
186
  transformed_data = {
187
  "id": unique_id,
188
  "model": {
189
- "name": data.get("fullname"),
190
- "sha": data.get("Model sha"),
191
- "precision": data.get("Precision"),
192
- "type": mapped_type,
193
- "weight_type": data.get("Weight type"),
194
- "architecture": data.get("Architecture"),
195
- "average_score": data.get("Average ⬆️"),
196
- "has_chat_template": data.get("Chat Template", False)
197
  },
198
  "evaluations": evaluations,
199
  "features": features,
200
  "metadata": metadata
201
  }
202
-
203
  logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
204
  return transformed_data
205
 
206
  except Exception as e:
207
- logger.error(LogFormatter.error(f"Failed to transform data for {data.get('fullname', 'Unknown')}", e))
208
  raise
 
6
  import logging
7
  from app.config.base import HF_ORGANIZATION
8
  from app.core.formatting import LogFormatter
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ import json
12
+ import os
13
+ import glob
14
+ from transformers import AutoConfig
15
+ from transformers.models.auto.tokenization_auto import AutoTokenizer
16
+ import dateutil
17
+ import numpy as np
18
+ from huggingface_hub import snapshot_download
19
+ from app.services.models import ModelService
20
+
21
+ from app.config import (
22
+ RESULTS_CACHE,
23
+ EVAL_CACHE,
24
+ HF_TOKEN,
25
+ )
26
+
27
+ from app.config.hf_config import (
28
+ RESULTS_REPO,
29
+ )
30
 
31
  logger = logging.getLogger(__name__)
32
 
33
+ ## All the model information that we might need
34
+ ## TODO move all these classes to proper place
35
+ @dataclass
36
+ class ModelDetails:
37
+ name: str
38
+ display_name: str = ""
39
+ symbol: str = "" # emoji
40
+
41
+ class ModelType(Enum):
42
+ PT = ModelDetails(name="pretrained", symbol="🟒")
43
+ FT = ModelDetails(name="fine-tunedondomain-specificdatasets", symbol="πŸ”Ά")
44
+ IFT = ModelDetails(name="instruction-tuned", symbol="β­•")
45
+ RL = ModelDetails(name="RL-tuned", symbol="🟦")
46
+ CHAT = ModelDetails(name="chatmodels")
47
+ Unknown = ModelDetails(name="", symbol="?")
48
+
49
+ def to_str(self):
50
+ return f"{self.value.name}"
51
+
52
+ @staticmethod
53
+ def from_str(type):
54
+ if "fine-tuned" in type or "πŸ”Ά" in type:
55
+ return ModelType.FT
56
+ if "pretrained" in type or "🟒" in type:
57
+ return ModelType.PT
58
+ if "RL-tuned" in type or "🟦" in type:
59
+ return ModelType.RL
60
+ if "instruction-tuned" in type or "β­•" in type or "chatmodels" in type :
61
+ return ModelType.CHAT
62
+ return ModelType.Unknown
63
+
64
+
65
+ class WeightType(Enum):
66
+ Adapter = ModelDetails("Adapter")
67
+ Original = ModelDetails("Original")
68
+ Delta = ModelDetails("Delta")
69
+
70
+
71
+ class Precision(Enum):
72
+ float16 = ModelDetails("float16")
73
+ bfloat16 = ModelDetails("bfloat16")
74
+ Unknown = ModelDetails("?")
75
+
76
+ def from_str(precision):
77
+ if precision in ["torch.float16", "float16"]:
78
+ return Precision.float16
79
+ if precision in ["torch.bfloat16", "bfloat16"]:
80
+ return Precision.bfloat16
81
+ return Precision.Unknown
82
+
83
+ @dataclass
84
+ class Task:
85
+ benchmark: str
86
+ metric: str
87
+ normalized_metric: str
88
+ col_name: str
89
+
90
+ class Tasks(Enum):
91
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
92
+ # task0 = Task("IFEVal-fr", "metric_name", "IFEVal-fr")
93
+ # task1 = Task("GPQA-fr", "metric_name", "GPQA-fr")
94
+ # task2 = Task("BAC-fr", "metric_name", "BAC-fr")
95
+ task0 = Task("community|gpqa-fr|0", "acc", "norm_acc", "GPQA-fr") # On pourrait vouloir mettre "Connaissances"
96
+ task1 = Task("community|ifeval-fr|0", "norm_acc", "norm_acc", "IFEval-fr") # FIXME norm_acc should be acc # et "Suivi d'instructions"
97
+
98
+ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
99
+ """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
100
+ try:
101
+ config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
102
+ if test_tokenizer:
103
+ try:
104
+ tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
105
+ except ValueError as e:
106
+ return (
107
+ False,
108
+ f"uses a tokenizer which is not in a transformers release: {e}",
109
+ None
110
+ )
111
+ except Exception as e:
112
+ return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
113
+ return True, None, config
114
+
115
+ except ValueError:
116
+ return (
117
+ False,
118
+ "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
119
+ None
120
+ )
121
+
122
+ except Exception as e:
123
+ return False, "was not found on hub!", None
124
+
125
+ @dataclass
126
+ class EvalResult:
127
+ """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
128
+
129
+ eval_name: str # org_model_precision (uid)
130
+ full_model: str # org/model (path on hub)
131
+ org: str
132
+ model: str
133
+ revision: str # commit hash, "" if main
134
+ results: dict
135
+ normalized_results: dict
136
+ precision: Precision = Precision.Unknown
137
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
138
+ weight_type: WeightType = WeightType.Original # Original or Adapter
139
+ architecture: str = "Unknown"
140
+ license: str = "?"
141
+ likes: int = 0
142
+ num_params: int = 0
143
+ date: str = "" # submission date of request file
144
+ still_on_hub: bool = False
145
+
146
+ @classmethod
147
+ def init_from_json_file(self, json_filepath):
148
+ """Inits the result from the specific model result file"""
149
+ with open(json_filepath) as fp:
150
+ data = json.load(fp)
151
+
152
+ config = data.get("config_general")
153
+
154
+ # Precision
155
+ precision = Precision.from_str(config.get("model_dtype"))
156
+
157
+ # Get model and org
158
+ org_and_model = config.get("model_name", config.get("model_args", None))
159
+ org_and_model = org_and_model.split("/", 1)
160
+
161
+ if len(org_and_model) == 1:
162
+ org = None
163
+ model = org_and_model[0]
164
+ result_key = f"{model}_{precision.value.name}"
165
+ else:
166
+ org = org_and_model[0]
167
+ model = org_and_model[1]
168
+ result_key = f"{org}_{model}_{precision.value.name}"
169
+ full_model = "/".join(org_and_model)
170
+
171
+ still_on_hub, _, model_config = is_model_on_hub(
172
+ full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
173
+ )
174
+ architecture = "?"
175
+ if model_config is not None:
176
+ architectures = getattr(model_config, "architectures", None)
177
+ if architectures:
178
+ architecture = ";".join(architectures)
179
+
180
+ # Extract results available in this file (some results are split in several files)
181
+ results = {}
182
+ normalized_results = {}
183
+ for task in Tasks:
184
+ task = task.value
185
+
186
+ # We average all scores of a given metric (not all metrics are present in all files)
187
+ accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
188
+ if accs.size == 0 or any([acc is None for acc in accs]):
189
+ continue
190
+
191
+ mean_acc = np.mean(accs) * 100.0
192
+ results[task.benchmark] = mean_acc
193
+
194
+ r = data["results"][task.benchmark].get(task.normalized_metric, None)
195
+ if r is None:
196
+ continue
197
+ normalized_results[task.benchmark] = r * 100.0
198
+
199
+ return self(
200
+ eval_name=result_key,
201
+ full_model=full_model,
202
+ org=org,
203
+ model=model,
204
+ results=results,
205
+ normalized_results=normalized_results,
206
+ precision=precision,
207
+ revision=config.get("model_sha", ""),
208
+ still_on_hub=still_on_hub,
209
+ architecture=architecture,
210
+ )
211
+
212
+ def update_with_request_file(self, existing_models):
213
+ """Finds the relevant request file for the current model and updates info with it"""
214
+ for status, models in existing_models.items():
215
+ if status == "finished":
216
+ for model in models:
217
+ if model["name"] == self.full_model and model["precision"] == self.precision.value.name: # FIXME and model["revision"] == model_data["revision"]:
218
+ self.model_type = ModelType.from_str(model["model_type"])
219
+ self.weight_type = WeightType[model["weight_type"]]
220
+ #self.license = request.get("license", "?")
221
+ #self.likes = request.get("likes", 0)
222
+ #self.num_params = request.get("params", 0)
223
+ #self.date = request.get("submitted_time", "")
224
+ return
225
+ print(
226
+ f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}"
227
+ )
228
+
229
  class LeaderboardService:
230
  def __init__(self):
231
+ self.model_service = ModelService()
232
  pass
233
+
234
+ async def get_raw_eval_results(self, results_path: str, requests_path: str) -> list[EvalResult]:
235
+ """From the path of the results folder root, extract all needed info for results"""
236
+ model_result_filepaths = []
237
+
238
+ for root, _, files in os.walk(results_path):
239
+ # We should only have json files in model results
240
+ if len(files) == 0 or any([not f.endswith(".json") for f in files]):
241
+ continue
242
+
243
+ # Sort the files by date
244
+ try:
245
+ files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
246
+ except dateutil.parser._parser.ParserError:
247
+ files = [files[-1]]
248
+
249
+ for file in files:
250
+ model_result_filepaths.append(os.path.join(root, file))
251
+
252
+ eval_results = {}
253
+ await self.model_service.initialize()
254
+ for model_result_filepath in model_result_filepaths:
255
+ # Creation of result
256
+ eval_result = EvalResult.init_from_json_file(model_result_filepath)
257
+ existing_models = await self.model_service.get_models()
258
+ eval_result.update_with_request_file(existing_models)
259
+
260
+ # Store results of same eval together
261
+ eval_name = eval_result.eval_name
262
+ if eval_name in eval_results.keys():
263
+ eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
264
+ else:
265
+ eval_results[eval_name] = eval_result
266
+
267
+ return eval_results.values()
268
+
269
 
270
+ async def fetch_raw_data(self) -> List[EvalResult]:
271
  """Fetch raw leaderboard data from HuggingFace dataset"""
272
  try:
273
  logger.info(LogFormatter.section("FETCHING LEADERBOARD DATA"))
274
  logger.info(LogFormatter.info(f"Loading dataset from {HF_ORGANIZATION}/contents"))
275
+
276
+ # TODO: cache
277
+ snapshot_download(
278
+ repo_id=RESULTS_REPO,
279
+ local_dir=RESULTS_CACHE,
280
+ repo_type="dataset",
281
+ tqdm_class=None,
282
+ etag_timeout=30,
283
+ token=HF_TOKEN,
284
+ )
285
+
286
+ data = await self.get_raw_eval_results(RESULTS_CACHE, EVAL_CACHE)
 
 
 
 
287
  return data
288
 
289
  except Exception as e:
 
345
  logger.error(LogFormatter.error("Failed to format leaderboard data", e))
346
  raise HTTPException(status_code=500, detail=str(e))
347
 
348
+ async def transform_data(self, data: EvalResult) -> Dict[str, Any]:
349
  """Transform raw data into the format expected by the frontend"""
350
  try:
351
  # Extract model name for logging
352
+ model_name = data.full_model
353
  logger.debug(LogFormatter.info(f"Transforming data for model: {model_name}"))
354
 
355
  # Create unique ID combining model name, precision, sha and chat template status
356
+ unique_id = f"{data.full_model}_{data.precision}" # FIXME missing _{data.get('Model sha', 'Unknown')}_{str(data.get('Chat Template', False))}"
357
+
358
  evaluations = {
359
+ "ifeval_fr": {
360
+ "name": "IFEval FR",
361
+ "value": data.results.get("community|ifeval-fr|0", 0),
362
+ "normalized_score": data.normalized_results.get("community|ifeval-fr|0", 0),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  },
364
+ "gpqa_fr": {
365
+ "name": "GPQA FR",
366
+ "value": data.results.get("community|gpqa-fr|0", 0),
367
+ "normalized_score": data.normalized_results.get("community|gpqa-fr|0", 0),
368
  },
369
+ "bac_fr": {
370
+ "name": "BAC FR",
371
+ "value": 0, # FIXME
372
+ "normalized_score": 0 # FIXME
373
  }
374
  }
375
 
376
+ features = { }
 
 
 
 
 
 
377
 
378
+ # FIXME
379
+ # "is_not_available_on_hub": data.get("Available on the hub", False),
380
+ # "is_merged": data.get("Merged", False),
381
+ # "is_moe": data.get("MoE", False),
382
+ # "is_flagged": data.get("Flagged", False),
383
+ # "is_official_provider": data.get("Official Providers", False)
384
+ #}
385
+
386
+ metadata = { }
387
+
388
+ # FIXME
389
+ # "upload_date": data.get("Upload To Hub Date"),
390
+ # "submission_date": data.get("Submission Date"),
391
+ # "generation": data.get("Generation"),
392
+ # "base_model": data.get("Base Model"),
393
+ # "hub_license": data.get("Hub License"),
394
+ # "hub_hearts": data.get("Hub ❀️"),
395
+ # "params_billions": data.get("#Params (B)"),
396
+ # "co2_cost": data.get("COβ‚‚ cost (kg)", 0)
397
+ #}
398
 
399
  # Clean model type by removing emojis if present
400
+ model_type = data.model_type
401
+
402
+ # FIXME
 
 
 
 
 
403
  # Map old model types to new ones
404
+ #model_type_mapping = {
405
+ # "fine-tuned": "fined-tuned-on-domain-specific-dataset",
406
+ # "fine tuned": "fined-tuned-on-domain-specific-dataset",
407
+ # "finetuned": "fined-tuned-on-domain-specific-dataset",
408
+ # "fine_tuned": "fined-tuned-on-domain-specific-dataset",
409
+ # "ft": "fined-tuned-on-domain-specific-dataset",
410
+ # "finetuning": "fined-tuned-on-domain-specific-dataset",
411
+ # "fine tuning": "fined-tuned-on-domain-specific-dataset",
412
+ # "fine-tuning": "fined-tuned-on-domain-specific-dataset"
413
+ #}
414
 
415
+ #mapped_type = model_type_mapping.get(model_type.lower().strip(), model_type)
416
+
417
+ #if mapped_type != model_type:
418
+ # logger.debug(LogFormatter.info(f"Model type mapped: {model_type} -> {mapped_type}"))
419
 
420
  transformed_data = {
421
  "id": unique_id,
422
  "model": {
423
+ "name": data.full_model,
424
+ "sha": "", # FIXME data.get("Model sha"),
425
+ "precision": data.precision.name,
426
+ "type": model_type.to_str(),
427
+ "weight_type": data.weight_type.name,
428
+ "architecture": data.architecture,
429
+ "average_score": sum([v for v in data.results.values() if v is not None]) / len(Tasks),
430
+ "has_chat_template": False, # FIXME data.get("Chat Template", False),
431
  },
432
  "evaluations": evaluations,
433
  "features": features,
434
  "metadata": metadata
435
  }
436
+
437
  logger.debug(LogFormatter.success(f"Successfully transformed data for {model_name}"))
438
  return transformed_data
439
 
440
  except Exception as e:
441
+ logger.error(LogFormatter.error(f"Failed to transform data for {data.full_model}", e))
442
  raise
backend/app/services/models.py CHANGED
@@ -16,11 +16,12 @@ from concurrent.futures import ThreadPoolExecutor
16
  import tempfile
17
 
18
  from app.config import (
 
 
 
 
19
  QUEUE_REPO,
20
- HF_TOKEN,
21
- EVAL_REQUESTS_PATH
22
  )
23
- from app.config.hf_config import HF_ORGANIZATION
24
  from app.services.hf_service import HuggingFaceService
25
  from app.utils.model_validation import ModelValidator
26
  from app.services.votes import VoteService
@@ -186,7 +187,9 @@ class ModelService(HuggingFaceService):
186
  "wait_time": f"{wait_time.total_seconds():.1f}s",
187
  "submission_time": content["submitted_time"],
188
  "status": target_status,
189
- "precision": content.get("precision", "Unknown")
 
 
190
  }
191
 
192
  progress.update()
 
16
  import tempfile
17
 
18
  from app.config import (
19
+ HF_TOKEN
20
+ )
21
+ from app.config.hf_config import (
22
+ HF_ORGANIZATION,
23
  QUEUE_REPO,
 
 
24
  )
 
25
  from app.services.hf_service import HuggingFaceService
26
  from app.utils.model_validation import ModelValidator
27
  from app.services.votes import VoteService
 
187
  "wait_time": f"{wait_time.total_seconds():.1f}s",
188
  "submission_time": content["submitted_time"],
189
  "status": target_status,
190
+ "precision": content.get("precision", "Unknown"),
191
+ "model_type": content["model_type"],
192
+ "weight_type": content["weight_type"],
193
  }
194
 
195
  progress.update()
docker-compose.yml CHANGED
@@ -27,7 +27,6 @@ services:
27
  environment:
28
  - NODE_ENV=${ENVIRONMENT:-development}
29
  - CHOKIDAR_USEPOLLING=true
30
- - PORT=${FRONTEND_PORT:-7860}
31
- command: npm start
32
  stdin_open: true
33
  tty: true
 
27
  environment:
28
  - NODE_ENV=${ENVIRONMENT:-development}
29
  - CHOKIDAR_USEPOLLING=true
30
+ command: yarn dev --host 0.0.0.0 --port ${FRONTEND_PORT:-7860}
 
31
  stdin_open: true
32
  tty: true
frontend/Dockerfile.dev CHANGED
@@ -2,14 +2,11 @@ FROM node:18
2
 
3
  WORKDIR /app
4
 
5
- # Install required global dependencies
6
- RUN npm install -g react-scripts
7
-
8
  # Copy package.json and package-lock.json
9
  COPY package*.json ./
10
 
11
  # Install project dependencies
12
- RUN npm install
13
 
14
  # Volume will be mounted here, no need for COPY
15
- CMD ["npm", "start"]
 
2
 
3
  WORKDIR /app
4
 
 
 
 
5
  # Copy package.json and package-lock.json
6
  COPY package*.json ./
7
 
8
  # Install project dependencies
9
+ RUN yarn install
10
 
11
  # Volume will be mounted here, no need for COPY
12
+ CMD ["yarn", "dev"]
frontend/README.md CHANGED
@@ -1,80 +1,8 @@
1
- # Frontend - Open LLM Leaderboard πŸ†
2
 
3
- React interface for exploring and comparing open-source language models.
4
 
5
- ## πŸ— Architecture
6
 
7
- ```mermaid
8
- flowchart TD
9
- Client(["User Browser"]) --> Components["React Components"]
10
-
11
- subgraph Frontend
12
- Components --> Context["Context Layer<br>β€’ LeaderboardContext<br>β€’ Global State"]
13
-
14
- API["API Layer<br>β€’ /api/leaderboard/formatted<br>β€’ TanStack Query"] --> |Data Feed| Context
15
-
16
- Context --> Hooks["Hooks Layer<br>β€’ Data Processing<br>β€’ Filtering<br>β€’ Caching"]
17
-
18
- Hooks --> Features["Features<br>β€’ Table Management<br>β€’ Search & Filters<br>β€’ Display Options"]
19
- Features --> Cache["Cache Layer<br>β€’ LocalStorage<br>β€’ URL State"]
20
- end
21
-
22
- API --> Backend["Backend Server"]
23
-
24
- style Backend fill:#f96,stroke:#333,stroke-width:2px
25
- ```
26
-
27
- ## ✨ Core Features
28
-
29
- - πŸ” **Search & Filters**: Real-time filtering, regex search, advanced filters
30
- - πŸ“Š **Data Visualization**: Interactive table, customizable columns, sorting
31
- - πŸ”„ **State Management**: URL sync, client-side caching (5min TTL)
32
- - πŸ“± **Responsive Design**: Mobile-friendly, dark/light themes
33
-
34
- ## πŸ›  Tech Stack
35
-
36
- - React 18 + Material-UI
37
- - TanStack Query & Table
38
- - React Router v6
39
-
40
- ## πŸ“ Project Structure
41
-
42
- ```
43
- src/
44
- β”œβ”€β”€ pages/
45
- β”‚ └── LeaderboardPage/
46
- β”‚ β”œβ”€β”€ components/ # UI Components
47
- β”‚ β”œβ”€β”€ context/ # Global State
48
- β”‚ └── hooks/ # Data Processing
49
- β”œβ”€β”€ components/ # Shared Components
50
- └── utils/ # Helper Functions
51
- ```
52
-
53
- ## πŸš€ Development
54
-
55
- ```bash
56
- # Install dependencies
57
- npm install
58
-
59
- # Start development server
60
- npm start
61
-
62
- # Production build
63
- npm run build
64
- ```
65
-
66
- ## πŸ”§ Environment Variables
67
-
68
- ```env
69
- # API Configuration
70
- REACT_APP_API_URL=http://localhost:8000
71
- REACT_APP_CACHE_DURATION=300000 # 5 minutes
72
- ```
73
-
74
- ## πŸ”„ Data Flow
75
-
76
- 1. API fetches leaderboard data from backend
77
- 2. Context stores and manages global state
78
- 3. Hooks handle data processing and filtering
79
- 4. Components render based on processed data
80
- 5. Cache maintains user preferences and URL state
 
1
+ # React + Vite
2
 
3
+ This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
4
 
5
+ Currently, two official plugins are available:
6
 
7
+ - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
8
+ - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/dist/assets/index-DMNVfo70.js ADDED
The diff for this file is too large to render. See raw diff
 
frontend/dist/assets/index-kQJbKSsj.css ADDED
@@ -0,0 +1 @@
 
 
1
+ :root{font-family:Inter,system-ui,Avenir,Helvetica,Arial,sans-serif;line-height:1.5;font-weight:400;color-scheme:light dark;color:#ffffffde;background-color:#242424;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}a{font-weight:500;color:#646cff;text-decoration:inherit}a:hover{color:#535bf2}body{margin:0;display:flex;place-items:center;min-width:320px;min-height:100vh}h1{font-size:3.2em;line-height:1.1}button{border-radius:8px;border:1px solid transparent;padding:.6em 1.2em;font-size:1em;font-weight:500;font-family:inherit;background-color:#1a1a1a;cursor:pointer;transition:border-color .25s}button:hover{border-color:#646cff}button:focus,button:focus-visible{outline:4px auto -webkit-focus-ring-color}@media (prefers-color-scheme: light){:root{color:#213547;background-color:#fff}a:hover{color:#747bff}button{background-color:#f9f9f9}}
frontend/{public β†’ dist}/index.html RENAMED
@@ -2,7 +2,7 @@
2
  <html lang="en">
3
  <head>
4
  <meta charset="utf-8" />
5
- <link rel="icon" href="%PUBLIC_URL%/logo32.png" />
6
  <meta
7
  name="viewport"
8
  content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
@@ -26,7 +26,7 @@
26
  property="og:description"
27
  content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
28
  />
29
- <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
30
 
31
  <!-- Twitter -->
32
  <meta property="twitter:card" content="summary_large_image" />
@@ -42,13 +42,13 @@
42
  property="twitter:description"
43
  content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
44
  />
45
- <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
46
  <!--
47
- Notice the use of %PUBLIC_URL% in the tags above.
48
  It will be replaced with the URL of the `public` folder during the build.
49
  Only files inside the `public` folder can be referenced from the HTML.
50
 
51
- Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
52
  work correctly both with client-side routing and a non-root public URL.
53
  Learn how to configure a non-root public URL by running `npm run build`.
54
  -->
@@ -78,19 +78,11 @@
78
  -webkit-overflow-scrolling: touch;
79
  }
80
  </style>
 
 
81
  </head>
82
  <body>
83
  <noscript>You need to enable JavaScript to run this app.</noscript>
84
  <div id="root"></div>
85
- <!--
86
- This HTML file is a template.
87
- If you open it directly in the browser, you will see an empty page.
88
-
89
- You can add webfonts, meta tags, or analytics to this file.
90
- The build step will place the bundled scripts into the <body> tag.
91
-
92
- To begin the development, run `npm start` or `yarn start`.
93
- To create a production bundle, use `npm run build` or `yarn build`.
94
- -->
95
  </body>
96
  </html>
 
2
  <html lang="en">
3
  <head>
4
  <meta charset="utf-8" />
5
+ <link rel="icon" href="/logo32.png" />
6
  <meta
7
  name="viewport"
8
  content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
 
26
  property="og:description"
27
  content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
28
  />
29
+ <meta property="og:image" content="/og-image.png" />
30
 
31
  <!-- Twitter -->
32
  <meta property="twitter:card" content="summary_large_image" />
 
42
  property="twitter:description"
43
  content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
44
  />
45
+ <meta property="twitter:image" content="/og-image.png" />
46
  <!--
47
+ Notice the use of in the tags above.
48
  It will be replaced with the URL of the `public` folder during the build.
49
  Only files inside the `public` folder can be referenced from the HTML.
50
 
51
+ Unlike "/favicon.ico" or "favicon.ico", "/favicon.ico" will
52
  work correctly both with client-side routing and a non-root public URL.
53
  Learn how to configure a non-root public URL by running `npm run build`.
54
  -->
 
78
  -webkit-overflow-scrolling: touch;
79
  }
80
  </style>
81
+ <script type="module" crossorigin src="/assets/index-DMNVfo70.js"></script>
82
+ <link rel="stylesheet" crossorigin href="/assets/index-kQJbKSsj.css">
83
  </head>
84
  <body>
85
  <noscript>You need to enable JavaScript to run this app.</noscript>
86
  <div id="root"></div>
 
 
 
 
 
 
 
 
 
 
87
  </body>
88
  </html>
frontend/dist/vite.svg ADDED
frontend/eslint.config.js ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from '@eslint/js'
2
+ import globals from 'globals'
3
+ import react from 'eslint-plugin-react'
4
+ import reactHooks from 'eslint-plugin-react-hooks'
5
+ import reactRefresh from 'eslint-plugin-react-refresh'
6
+
7
+ export default [
8
+ { ignores: ['dist'] },
9
+ {
10
+ files: ['**/*.{js,jsx}'],
11
+ languageOptions: {
12
+ ecmaVersion: 2020,
13
+ globals: globals.browser,
14
+ parserOptions: {
15
+ ecmaVersion: 'latest',
16
+ ecmaFeatures: { jsx: true },
17
+ sourceType: 'module',
18
+ },
19
+ },
20
+ settings: { react: { version: '18.3' } },
21
+ plugins: {
22
+ react,
23
+ 'react-hooks': reactHooks,
24
+ 'react-refresh': reactRefresh,
25
+ },
26
+ rules: {
27
+ ...js.configs.recommended.rules,
28
+ ...react.configs.recommended.rules,
29
+ ...react.configs['jsx-runtime'].rules,
30
+ ...reactHooks.configs.recommended.rules,
31
+ 'react/jsx-no-target-blank': 'off',
32
+ 'react-refresh/only-export-components': [
33
+ 'warn',
34
+ { allowConstantExport: true },
35
+ ],
36
+ },
37
+ },
38
+ ]
frontend/index.html ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <link rel="icon" href="/logo32.png" />
6
+ <meta
7
+ name="viewport"
8
+ content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no, viewport-fit=cover"
9
+ />
10
+ <meta
11
+ name="description"
12
+ content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
13
+ />
14
+
15
+ <!-- Open Graph / Facebook -->
16
+ <meta property="og:type" content="website" />
17
+ <meta
18
+ property="og:url"
19
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
20
+ />
21
+ <meta
22
+ property="og:title"
23
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
24
+ />
25
+ <meta
26
+ property="og:description"
27
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
28
+ />
29
+ <meta property="og:image" content="/og-image.png" />
30
+
31
+ <!-- Twitter -->
32
+ <meta property="twitter:card" content="summary_large_image" />
33
+ <meta
34
+ property="twitter:url"
35
+ content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
36
+ />
37
+ <meta
38
+ property="twitter:title"
39
+ content="Open LLM Leaderboard - Compare Open Source Large Language Models"
40
+ />
41
+ <meta
42
+ property="twitter:description"
43
+ content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
44
+ />
45
+ <meta property="twitter:image" content="/og-image.png" />
46
+ <!--
47
+ Notice the use of in the tags above.
48
+ It will be replaced with the URL of the `public` folder during the build.
49
+ Only files inside the `public` folder can be referenced from the HTML.
50
+
51
+ Unlike "/favicon.ico" or "favicon.ico", "/favicon.ico" will
52
+ work correctly both with client-side routing and a non-root public URL.
53
+ Learn how to configure a non-root public URL by running `npm run build`.
54
+ -->
55
+ <title>
56
+ Open LLM Leaderboard - Compare Open Source Large Language Models
57
+ </title>
58
+ <link
59
+ href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
60
+ rel="stylesheet"
61
+ />
62
+ <style>
63
+ html,
64
+ body {
65
+ position: fixed;
66
+ width: 100%;
67
+ height: 100%;
68
+ overflow: hidden;
69
+ -webkit-overflow-scrolling: touch;
70
+ }
71
+ #root {
72
+ position: absolute;
73
+ top: 0;
74
+ left: 0;
75
+ right: 0;
76
+ bottom: 0;
77
+ overflow-y: auto;
78
+ -webkit-overflow-scrolling: touch;
79
+ }
80
+ </style>
81
+
82
+ <link rel="apple-touch-icon" href="./node_modules/@codegouvfr/react-dsfr/favicon/apple-touch-icon.png" />
83
+ <link rel="icon" href="./node_modules/@codegouvfr/react-dsfr/favicon/favicon.svg" type="image/svg+xml" />
84
+ <link rel="shortcut icon" href="./node_modules/@codegouvfr/react-dsfr/favicon/favicon.ico" type="image/x-icon" />
85
+ <link rel="manifest" href="./node_modules/@codegouvfr/react-dsfr/favicon/manifest.webmanifest" crossorigin="use-credentials" />
86
+
87
+ <link rel="stylesheet" href="./node_modules/@codegouvfr/react-dsfr/main.css" />
88
+ </head>
89
+ <body>
90
+ <noscript>You need to enable JavaScript to run this app.</noscript>
91
+ <div id="root"></div>
92
+ <script type="module" src="/src/main.jsx"></script>
93
+ </body>
94
+ </html>
frontend/package.json CHANGED
@@ -1,8 +1,18 @@
1
  {
2
- "name": "open-llm-leaderboard",
3
- "version": "0.1.0",
4
  "private": true,
 
 
 
 
 
 
 
 
 
 
5
  "dependencies": {
 
6
  "@emotion/react": "^11.13.3",
7
  "@emotion/styled": "^11.13.0",
8
  "@huggingface/hub": "^0.14.0",
@@ -13,43 +23,24 @@
13
  "@tanstack/react-query": "^5.62.2",
14
  "@tanstack/react-table": "^8.20.5",
15
  "@tanstack/react-virtual": "^3.10.9",
16
- "@testing-library/jest-dom": "^5.17.0",
17
- "@testing-library/react": "^13.4.0",
18
- "@testing-library/user-event": "^13.5.0",
19
  "compression": "^1.7.4",
20
  "cors": "^2.8.5",
21
  "express": "^4.18.2",
 
22
  "react": "^18.3.1",
23
  "react-dom": "^18.3.1",
24
- "react-router-dom": "^6.28.0",
25
- "react-scripts": "5.0.1",
26
- "serve-static": "^1.15.0",
27
- "web-vitals": "^2.1.4"
28
- },
29
- "scripts": {
30
- "start": "react-scripts start",
31
- "build": "react-scripts build",
32
- "test": "react-scripts test",
33
- "eject": "react-scripts eject",
34
- "serve": "node server.js"
35
- },
36
- "eslintConfig": {
37
- "extends": [
38
- "react-app",
39
- "react-app/jest"
40
- ]
41
- },
42
- "browserslist": {
43
- "production": [
44
- ">0.2%",
45
- "not dead",
46
- "not op_mini all"
47
- ],
48
- "development": [
49
- "last 1 chrome version",
50
- "last 1 firefox version",
51
- "last 1 safari version"
52
- ]
53
  },
54
- "proxy": "http://backend:8000"
 
 
 
 
 
 
 
 
 
 
 
55
  }
 
1
  {
2
+ "name": "frontend2",
 
3
  "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "vite build",
9
+ "lint": "eslint .",
10
+ "preview": "vite preview",
11
+ "predev": "react-dsfr update-icons",
12
+ "prebuild": "react-dsfr update-icons"
13
+ },
14
  "dependencies": {
15
+ "@codegouvfr/react-dsfr": "^1.17.0",
16
  "@emotion/react": "^11.13.3",
17
  "@emotion/styled": "^11.13.0",
18
  "@huggingface/hub": "^0.14.0",
 
23
  "@tanstack/react-query": "^5.62.2",
24
  "@tanstack/react-table": "^8.20.5",
25
  "@tanstack/react-virtual": "^3.10.9",
 
 
 
26
  "compression": "^1.7.4",
27
  "cors": "^2.8.5",
28
  "express": "^4.18.2",
29
+ "http-proxy-middleware": "^3.0.3",
30
  "react": "^18.3.1",
31
  "react-dom": "^18.3.1",
32
+ "react-router-dom": "^6.28.0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
+ "devDependencies": {
35
+ "@eslint/js": "^9.17.0",
36
+ "@types/react": "^18.3.18",
37
+ "@types/react-dom": "^18.3.5",
38
+ "@vitejs/plugin-react": "^4.3.4",
39
+ "eslint": "^9.17.0",
40
+ "eslint-plugin-react": "^7.37.2",
41
+ "eslint-plugin-react-hooks": "^5.0.0",
42
+ "eslint-plugin-react-refresh": "^0.4.16",
43
+ "globals": "^15.14.0",
44
+ "vite": "^6.0.5"
45
+ }
46
  }
frontend/public/logo256.png DELETED
Binary file (24.6 kB)
 
frontend/public/logo32.png DELETED
Binary file (1.96 kB)
 
frontend/public/og-image.jpg DELETED
Binary file (13.8 kB)
 
frontend/public/robots.txt DELETED
@@ -1,3 +0,0 @@
1
- # https://www.robotstxt.org/robotstxt.html
2
- User-agent: *
3
- Disallow:
 
 
 
 
frontend/public/vite.svg ADDED
frontend/{server.js β†’ server.cjs} RENAMED
@@ -30,7 +30,7 @@ app.use(
30
 
31
  // Serve static files from the build directory
32
  app.use(
33
- express.static(path.join(__dirname, "build"), {
34
  // Don't cache HTML files
35
  setHeaders: (res, path) => {
36
  if (path.endsWith(".html")) {
 
30
 
31
  // Serve static files from the build directory
32
  app.use(
33
+ express.static(path.join(__dirname, "dist"), {
34
  // Don't cache HTML files
35
  setHeaders: (res, path) => {
36
  if (path.endsWith(".html")) {
frontend/src/App.css ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #root {
2
+ max-width: 1280px;
3
+ margin: 0 auto;
4
+ padding: 2rem;
5
+ text-align: center;
6
+ }
7
+
8
+ .logo {
9
+ height: 6em;
10
+ padding: 1.5em;
11
+ will-change: filter;
12
+ transition: filter 300ms;
13
+ }
14
+ .logo:hover {
15
+ filter: drop-shadow(0 0 2em #646cffaa);
16
+ }
17
+ .logo.react:hover {
18
+ filter: drop-shadow(0 0 2em #61dafbaa);
19
+ }
20
+
21
+ @keyframes logo-spin {
22
+ from {
23
+ transform: rotate(0deg);
24
+ }
25
+ to {
26
+ transform: rotate(360deg);
27
+ }
28
+ }
29
+
30
+ @media (prefers-reduced-motion: no-preference) {
31
+ a:nth-of-type(2) .logo {
32
+ animation: logo-spin infinite 20s linear;
33
+ }
34
+ }
35
+
36
+ .card {
37
+ padding: 2em;
38
+ }
39
+
40
+ .read-the-docs {
41
+ color: #888;
42
+ }
frontend/src/{App.js β†’ App.jsx} RENAMED
@@ -13,7 +13,8 @@ import LeaderboardPage from "./pages/LeaderboardPage/LeaderboardPage";
13
  import AddModelPage from "./pages/AddModelPage/AddModelPage";
14
  import QuotePage from "./pages/QuotePage/QuotePage";
15
  import VoteModelPage from "./pages/VoteModelPage/VoteModelPage";
16
- import Footer from "./components/Footer/Footer";
 
17
  import getTheme from "./config/theme";
18
  import { useThemeMode } from "./hooks/useThemeMode";
19
  import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
@@ -94,6 +95,16 @@ function App() {
94
  color: "text.primary",
95
  }}
96
  >
 
 
 
 
 
 
 
 
 
 
97
  <Navigation onToggleTheme={toggleTheme} mode={mode} />
98
  <Box
99
  sx={{
@@ -112,7 +123,31 @@ function App() {
112
  <Route path="/vote" element={<VoteModelPage />} />
113
  </Routes>
114
  </Box>
115
- <Footer />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  </Box>
117
  </LeaderboardProvider>
118
  </Router>
 
13
  import AddModelPage from "./pages/AddModelPage/AddModelPage";
14
  import QuotePage from "./pages/QuotePage/QuotePage";
15
  import VoteModelPage from "./pages/VoteModelPage/VoteModelPage";
16
+ import { Header } from "@codegouvfr/react-dsfr/Header";
17
+ import { Footer } from "@codegouvfr/react-dsfr/Footer";
18
  import getTheme from "./config/theme";
19
  import { useThemeMode } from "./hooks/useThemeMode";
20
  import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
 
95
  color: "text.primary",
96
  }}
97
  >
98
+ <Header
99
+ brandTop={<>INTITULE<br />OFFICIEL</>}
100
+ homeLinkProps={{
101
+ href: '/',
102
+ title: 'Accueil - Nom de l’entitΓ© (ministΓ¨re, secrΓ©tariat dβ€˜Γ©tat, gouvernement)'
103
+ }}
104
+ id="fr-header-simple-header-with-service-title-and-tagline"
105
+ serviceTagline="baseline - prΓ©cisions sur l'organisation"
106
+ serviceTitle="Nom du site / service"
107
+ />
108
  <Navigation onToggleTheme={toggleTheme} mode={mode} />
109
  <Box
110
  sx={{
 
123
  <Route path="/vote" element={<VoteModelPage />} />
124
  </Routes>
125
  </Box>
126
+ <Footer
127
+ accessibility="fully compliant"
128
+ contentDescription="
129
+ Ce message est Γ  remplacer par les informations de votre site.
130
+
131
+ Comme exemple de contenu, vous pouvez indiquer les informations
132
+ suivantes : Le site officiel d’information administrative pour les entreprises.
133
+ Retrouvez toutes les informations et dΓ©marches administratives nΓ©cessaires Γ  la crΓ©ation,
134
+ Γ  la gestion et au dΓ©veloppement de votre entreprise.
135
+ "
136
+ partnersLogos={{
137
+ sub: [
138
+ {
139
+ alt: '[Γ€ MODIFIER - texte alternatif de l’image]',
140
+ href: '#',
141
+ imgUrl: 'static/media/placeholder.16x9.3d46f94c.png'
142
+ },
143
+ {
144
+ alt: '[Γ€ MODIFIER - texte alternatif de l’image]',
145
+ href: '#',
146
+ imgUrl: 'static/media/placeholder.16x9.3d46f94c.png'
147
+ }
148
+ ]
149
+ }}
150
+ />
151
  </Box>
152
  </LeaderboardProvider>
153
  </Router>
frontend/src/assets/react.svg ADDED
frontend/src/components/Footer/{Footer.js β†’ Footer.jsx} RENAMED
File without changes
frontend/src/components/Logo/{HFLogo.js β†’ HFLogo.jsx} RENAMED
File without changes
frontend/src/components/Logo/{Logo.js β†’ Logo.jsx} RENAMED
File without changes
frontend/src/components/Navigation/{Navigation.js β†’ Navigation.jsx} RENAMED
File without changes
frontend/src/components/shared/{AuthContainer.js β†’ AuthContainer.jsx} RENAMED
File without changes
frontend/src/components/shared/{FilterTag.js β†’ FilterTag.jsx} RENAMED
File without changes
frontend/src/components/shared/{InfoIconWithTooltip.js β†’ InfoIconWithTooltip.jsx} RENAMED
File without changes
frontend/src/components/shared/{PageHeader.js β†’ PageHeader.jsx} RENAMED
File without changes
frontend/src/index.css ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
3
+ line-height: 1.5;
4
+ font-weight: 400;
5
+
6
+ color-scheme: light dark;
7
+ color: rgba(255, 255, 255, 0.87);
8
+ background-color: #242424;
9
+
10
+ font-synthesis: none;
11
+ text-rendering: optimizeLegibility;
12
+ -webkit-font-smoothing: antialiased;
13
+ -moz-osx-font-smoothing: grayscale;
14
+ }
15
+
16
+ a {
17
+ font-weight: 500;
18
+ color: #646cff;
19
+ text-decoration: inherit;
20
+ }
21
+ a:hover {
22
+ color: #535bf2;
23
+ }
24
+
25
+ body {
26
+ margin: 0;
27
+ display: flex;
28
+ place-items: center;
29
+ min-width: 320px;
30
+ min-height: 100vh;
31
+ }
32
+
33
+ h1 {
34
+ font-size: 3.2em;
35
+ line-height: 1.1;
36
+ }
37
+
38
+ button {
39
+ border-radius: 8px;
40
+ border: 1px solid transparent;
41
+ padding: 0.6em 1.2em;
42
+ font-size: 1em;
43
+ font-weight: 500;
44
+ font-family: inherit;
45
+ background-color: #1a1a1a;
46
+ cursor: pointer;
47
+ transition: border-color 0.25s;
48
+ }
49
+ button:hover {
50
+ border-color: #646cff;
51
+ }
52
+ button:focus,
53
+ button:focus-visible {
54
+ outline: 4px auto -webkit-focus-ring-color;
55
+ }
56
+
57
+ @media (prefers-color-scheme: light) {
58
+ :root {
59
+ color: #213547;
60
+ background-color: #ffffff;
61
+ }
62
+ a:hover {
63
+ color: #747bff;
64
+ }
65
+ button {
66
+ background-color: #f9f9f9;
67
+ }
68
+ }
frontend/src/index.js DELETED
@@ -1,10 +0,0 @@
1
- import React from "react";
2
- import ReactDOM from "react-dom/client";
3
- import App from "./App";
4
-
5
- const root = ReactDOM.createRoot(document.getElementById("root"));
6
- root.render(
7
- <React.StrictMode>
8
- <App />
9
- </React.StrictMode>
10
- );
 
 
 
 
 
 
 
 
 
 
 
frontend/src/main.jsx ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { StrictMode } from 'react'
2
+ import { createRoot } from 'react-dom/client'
3
+ import './index.css'
4
+ import App from './App.jsx'
5
+ import { startReactDsfr } from "@codegouvfr/react-dsfr/spa";
6
+ startReactDsfr({ defaultColorScheme: "system" });
7
+
8
+ createRoot(document.getElementById('root')).render(
9
+ <StrictMode>
10
+ <App />
11
+ </StrictMode>,
12
+ )
frontend/src/pages/AddModelPage/{AddModelPage.js β†’ AddModelPage.jsx} RENAMED
File without changes
frontend/src/pages/AddModelPage/components/EvaluationQueues/{EvaluationQueues.js β†’ EvaluationQueues.jsx} RENAMED
File without changes
frontend/src/pages/AddModelPage/components/ModelSubmissionForm/{ModelSubmissionForm.js β†’ ModelSubmissionForm.jsx} RENAMED
@@ -21,8 +21,8 @@ import CheckCircleOutlineIcon from "@mui/icons-material/CheckCircleOutline";
21
  import ThumbUpIcon from "@mui/icons-material/ThumbUp";
22
  import { alpha } from "@mui/material/styles";
23
  import InfoIconWithTooltip from "../../../../components/shared/InfoIconWithTooltip";
24
- import { MODEL_TYPES } from "../../../../pages/LeaderboardPage/components/Leaderboard/constants/modelTypes";
25
- import { SUBMISSION_PRECISIONS } from "../../../../pages/LeaderboardPage/components/Leaderboard/constants/defaults";
26
  import AuthContainer from "../../../../components/shared/AuthContainer";
27
 
28
  const WEIGHT_TYPES = [
 
21
  import ThumbUpIcon from "@mui/icons-material/ThumbUp";
22
  import { alpha } from "@mui/material/styles";
23
  import InfoIconWithTooltip from "../../../../components/shared/InfoIconWithTooltip";
24
+ import { MODEL_TYPES } from "../../../LeaderboardPage/components/Leaderboard/constants/modelTypes";
25
+ import { SUBMISSION_PRECISIONS } from "../../../LeaderboardPage/components/Leaderboard/constants/defaults";
26
  import AuthContainer from "../../../../components/shared/AuthContainer";
27
 
28
  const WEIGHT_TYPES = [
frontend/src/pages/AddModelPage/components/SubmissionGuide/{SubmissionGuide.js β†’ SubmissionGuide.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/{LeaderboardPage.js β†’ LeaderboardPage.jsx} RENAMED
@@ -3,8 +3,8 @@ import Leaderboard from "./components/Leaderboard/Leaderboard";
3
  import { Box } from "@mui/material";
4
  import PageHeader from "../../components/shared/PageHeader";
5
  import Logo from "../../components/Logo/Logo";
6
- import { useLeaderboardData } from "../../pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData";
7
- import { useLeaderboard } from "../../pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext";
8
 
9
  function LeaderboardPage() {
10
  const { data, isLoading, error } = useLeaderboardData();
 
3
  import { Box } from "@mui/material";
4
  import PageHeader from "../../components/shared/PageHeader";
5
  import Logo from "../../components/Logo/Logo";
6
+ import { useLeaderboardData } from "./components/Leaderboard/hooks/useLeaderboardData";
7
+ import { useLeaderboard } from "./components/Leaderboard/context/LeaderboardContext";
8
 
9
  function LeaderboardPage() {
10
  const { data, isLoading, error } = useLeaderboardData();
frontend/src/pages/LeaderboardPage/components/Leaderboard/{Leaderboard.js β†’ Leaderboard.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/ColumnSelector/{ColumnSelector.js β†’ ColumnSelector.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/DisplayOptions/{DisplayOptions.js β†’ DisplayOptions.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{FilteredModelCount.js β†’ FilteredModelCount.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{Filters.js β†’ Filters.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{QuickFilters.js β†’ QuickFilters.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Filters/{SearchBar.js β†’ SearchBar.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/{PerformanceMonitor.js β†’ PerformanceMonitor.jsx} RENAMED
File without changes
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Table/{Table.js β†’ Table.jsx} RENAMED
File without changes