tfrere commited on
Commit
81e0b0c
·
1 Parent(s): d88a570

update error handling, improve upload security checks

Browse files
Files changed (36) hide show
  1. backend/lighteval_task/lighteval_task.py +2 -3
  2. backend/pyproject.toml +1 -0
  3. backend/routes/cleanup.py +1 -1
  4. backend/routes/upload.py +82 -10
  5. backend/tasks/create_bench_config_file.py +7 -6
  6. backend/tasks/evaluation_task.py +35 -36
  7. backend/tasks/get_available_model_provider.py +8 -1
  8. frontend/server.js +0 -2
  9. frontend/src/App.js +4 -21
  10. frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx} +12 -12
  11. frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx} +6 -26
  12. frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx} +152 -180
  13. frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js +192 -0
  14. frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js +106 -0
  15. frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js +66 -0
  16. frontend/src/components/BenchmarkEvaluation.jsx +0 -401
  17. frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx} +11 -8
  18. frontend/src/components/Evaluation/Evaluation.jsx +150 -0
  19. frontend/src/components/Evaluation/hooks/useEvaluation.js +148 -0
  20. frontend/src/components/Evaluation/hooks/useSimulation.js +59 -0
  21. frontend/src/components/Evaluation/hooks/useTimer.js +48 -0
  22. frontend/src/components/Footer/Footer.js +7 -5
  23. frontend/src/components/Intro.jsx +14 -2
  24. frontend/src/components/KeyboardShortcuts.jsx +0 -24
  25. frontend/src/components/{ExternalLinks.jsx → Navigation.jsx} +3 -3
  26. frontend/src/components/common/ErrorDisplay.jsx +43 -0
  27. frontend/src/components/shared/AuthContainer.js +0 -192
  28. frontend/src/components/shared/CodeBlock.js +0 -37
  29. frontend/src/components/shared/FilterTag.js +0 -139
  30. frontend/src/components/shared/InfoIconWithTooltip.js +0 -87
  31. frontend/src/components/shared/PageHeader.js +0 -29
  32. frontend/src/pages/BenchmarkDisplayPage.jsx +2 -2
  33. frontend/src/pages/BenchmarkEvaluationPage.jsx +2 -2
  34. frontend/src/pages/BenchmarkGenerationPage.jsx +2 -2
  35. frontend/src/pages/EvaluationDisplayPage.jsx +5 -9
  36. frontend/src/pages/HomePage.jsx +2 -2
backend/lighteval_task/lighteval_task.py CHANGED
@@ -218,11 +218,10 @@ def process_judge_response_yourbench(response):
218
  class JudgeLLMYourBench(JudgeLLM):
219
  def __init__(self):
220
  super().__init__(
221
- judge_model_name="Qwen/QwQ-32B",
222
  template=get_judge_prompt,
223
  process_judge_response=process_judge_response_yourbench,
224
- judge_backend="inference-providers",
225
- hf_provider="novita",
226
  short_judge_name="yourbench_judge",
227
  )
228
 
 
218
  class JudgeLLMYourBench(JudgeLLM):
219
  def __init__(self):
220
  super().__init__(
221
+ judge_model_name="gpt-4o-2024-08-06",
222
  template=get_judge_prompt,
223
  process_judge_response=process_judge_response_yourbench,
224
+ judge_backend="openai",
 
225
  short_judge_name="yourbench_judge",
226
  )
227
 
backend/pyproject.toml CHANGED
@@ -25,6 +25,7 @@ dependencies = [
25
  "pydantic>=2.6.0",
26
  "PyPDF2>=3.0.0",
27
  "beautifulsoup4>=4.12.0",
 
28
  ]
29
 
30
  [build-system]
 
25
  "pydantic>=2.6.0",
26
  "PyPDF2>=3.0.0",
27
  "beautifulsoup4>=4.12.0",
28
+ "evaluate>=0.4.0",
29
  ]
30
 
31
  [build-system]
backend/routes/cleanup.py CHANGED
@@ -27,7 +27,7 @@ async def cleanup_session(session_id: str):
27
  """
28
  # Check if we are in development mode
29
  # if os.environ.get("ENVIRONEMENT", "").lower() == "development":
30
- if False:
31
  logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
32
  return {
33
  "success": True,
 
27
  """
28
  # Check if we are in development mode
29
  # if os.environ.get("ENVIRONEMENT", "").lower() == "development":
30
+ if True:
31
  logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
32
  return {
33
  "success": True,
backend/routes/upload.py CHANGED
@@ -14,12 +14,23 @@ session_files = {}
14
  UPLOAD_ROOT = "uploaded_files"
15
  os.makedirs(UPLOAD_ROOT, exist_ok=True)
16
 
 
 
 
17
  def validate_pdf(file_path: str) -> bool:
18
  """Validate if file is a valid PDF."""
19
  try:
20
  reader = PdfReader(file_path)
21
  # Vérifier que le PDF a au moins une page
22
- return len(reader.pages) > 0
 
 
 
 
 
 
 
 
23
  except:
24
  return False
25
 
@@ -28,8 +39,8 @@ def validate_markdown(file_path: str) -> bool:
28
  try:
29
  with open(file_path, 'r', encoding='utf-8') as f:
30
  content = f.read()
31
- # Simple check: file should contain some content and at least one markdown element
32
- return len(content) > 0 and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
33
  except:
34
  return False
35
 
@@ -37,7 +48,11 @@ def validate_html(file_path: str) -> bool:
37
  """Validate if file is a valid HTML file."""
38
  try:
39
  with open(file_path, 'r', encoding='utf-8') as f:
40
- BeautifulSoup(f.read(), 'html.parser')
 
 
 
 
41
  return True
42
  except:
43
  return False
@@ -47,7 +62,7 @@ def validate_txt(file_path: str) -> bool:
47
  try:
48
  with open(file_path, 'r', encoding='utf-8') as f:
49
  content = f.read()
50
- return len(content.strip()) > 0
51
  except:
52
  return False
53
 
@@ -112,19 +127,76 @@ async def upload_file(file: UploadFile = File(...)):
112
 
113
  # Valider le fichier selon son type
114
  is_valid = False
 
 
115
  if file_extension == '.pdf':
116
- is_valid = validate_pdf(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  elif file_extension == '.md':
118
- is_valid = validate_markdown(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  elif file_extension == '.html':
120
- is_valid = validate_html(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
121
  elif file_extension == '.txt':
122
- is_valid = validate_txt(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  if not is_valid:
125
  # Supprimer le fichier invalide
126
  os.remove(file_path)
127
- raise HTTPException(status_code=400, detail=f"Invalid {file_extension[1:].upper()} file")
128
 
129
  # Store file path for later use
130
  session_files[session_id] = file_path
 
14
  UPLOAD_ROOT = "uploaded_files"
15
  os.makedirs(UPLOAD_ROOT, exist_ok=True)
16
 
17
+ # Longueur minimale pour tout fichier (en caractères)
18
+ MIN_FILE_LENGTH = 500
19
+
20
  def validate_pdf(file_path: str) -> bool:
21
  """Validate if file is a valid PDF."""
22
  try:
23
  reader = PdfReader(file_path)
24
  # Vérifier que le PDF a au moins une page
25
+ if len(reader.pages) == 0:
26
+ return False
27
+
28
+ # Extraire le texte pour vérifier la longueur
29
+ text = ""
30
+ for page in reader.pages:
31
+ text += page.extract_text()
32
+
33
+ return len(text) >= MIN_FILE_LENGTH
34
  except:
35
  return False
36
 
 
39
  try:
40
  with open(file_path, 'r', encoding='utf-8') as f:
41
  content = f.read()
42
+ # Vérifier longueur minimale et présence d'éléments markdown
43
+ return len(content) >= MIN_FILE_LENGTH and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
44
  except:
45
  return False
46
 
 
48
  """Validate if file is a valid HTML file."""
49
  try:
50
  with open(file_path, 'r', encoding='utf-8') as f:
51
+ content = f.read()
52
+ # Vérifier longueur minimale et structure HTML
53
+ if len(content) < MIN_FILE_LENGTH:
54
+ return False
55
+ BeautifulSoup(content, 'html.parser')
56
  return True
57
  except:
58
  return False
 
62
  try:
63
  with open(file_path, 'r', encoding='utf-8') as f:
64
  content = f.read()
65
+ return len(content.strip()) >= MIN_FILE_LENGTH
66
  except:
67
  return False
68
 
 
127
 
128
  # Valider le fichier selon son type
129
  is_valid = False
130
+ error_detail = ""
131
+
132
  if file_extension == '.pdf':
133
+ try:
134
+ reader = PdfReader(file_path)
135
+ if len(reader.pages) == 0:
136
+ error_detail = "PDF must contain at least one page"
137
+ is_valid = False
138
+ else:
139
+ text = ""
140
+ for page in reader.pages:
141
+ text += page.extract_text()
142
+
143
+ if len(text) < MIN_FILE_LENGTH:
144
+ error_detail = f"PDF contains {len(text)} characters but must contain at least {MIN_FILE_LENGTH}"
145
+ is_valid = False
146
+ else:
147
+ is_valid = True
148
+ except:
149
+ error_detail = "Invalid PDF format"
150
+ is_valid = False
151
  elif file_extension == '.md':
152
+ try:
153
+ with open(file_path, 'r', encoding='utf-8') as f:
154
+ content = f.read()
155
+
156
+ if len(content) < MIN_FILE_LENGTH:
157
+ error_detail = f"Markdown file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
158
+ is_valid = False
159
+ elif not any(marker in content for marker in ['#', '-', '*', '`', '[', '>']):
160
+ error_detail = "Markdown file does not contain any valid Markdown elements"
161
+ is_valid = False
162
+ else:
163
+ is_valid = True
164
+ except:
165
+ error_detail = "Invalid Markdown format"
166
+ is_valid = False
167
  elif file_extension == '.html':
168
+ try:
169
+ with open(file_path, 'r', encoding='utf-8') as f:
170
+ content = f.read()
171
+
172
+ if len(content) < MIN_FILE_LENGTH:
173
+ error_detail = f"HTML file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
174
+ is_valid = False
175
+ else:
176
+ BeautifulSoup(content, 'html.parser')
177
+ is_valid = True
178
+ except:
179
+ error_detail = "Invalid HTML format"
180
+ is_valid = False
181
  elif file_extension == '.txt':
182
+ try:
183
+ with open(file_path, 'r', encoding='utf-8') as f:
184
+ content = f.read()
185
+ content_length = len(content.strip())
186
+
187
+ if content_length < MIN_FILE_LENGTH:
188
+ error_detail = f"Text file contains {content_length} characters but must contain at least {MIN_FILE_LENGTH}"
189
+ is_valid = False
190
+ else:
191
+ is_valid = True
192
+ except:
193
+ error_detail = "Invalid text format"
194
+ is_valid = False
195
 
196
  if not is_valid:
197
  # Supprimer le fichier invalide
198
  os.remove(file_path)
199
+ raise HTTPException(status_code=400, detail=error_detail or f"Invalid {file_extension[1:].upper()} file")
200
 
201
  # Store file path for later use
202
  session_files[session_id] = file_path
backend/tasks/create_bench_config_file.py CHANGED
@@ -123,7 +123,8 @@ class CreateBenchConfigTask:
123
  required_models = [
124
  # "Qwen/Qwen2.5-72B-Instruct"
125
  # "meta-llama/Llama-3.1-8B-Instruct"
126
- "Qwen/Qwen2.5-32B-Instruct"
 
127
  ]
128
 
129
  # Track found models
@@ -166,11 +167,11 @@ class CreateBenchConfigTask:
166
  "model_list": model_list,
167
 
168
  "model_roles": {
169
- "ingestion": ["Qwen/Qwen2.5-32B-Instruct"],
170
- "summarization": ["Qwen/Qwen2.5-32B-Instruct"],
171
  "chunking": ["intfloat/multilingual-e5-large-instruct"],
172
- "single_shot_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
173
- "multi_hop_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
174
  },
175
  "pipeline": {
176
  "ingestion": {
@@ -201,7 +202,7 @@ class CreateBenchConfigTask:
201
  "additional_instructions": "Generate rich and creative questions to test a curious adult",
202
  "chunk_sampling": {
203
  "mode": "count",
204
- "value": 10,
205
  "random_seed": 123,
206
  },
207
  },
 
123
  required_models = [
124
  # "Qwen/Qwen2.5-72B-Instruct"
125
  # "meta-llama/Llama-3.1-8B-Instruct"
126
+ # "Qwen/Qwen2.5-32B-Instruct",
127
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
128
  ]
129
 
130
  # Track found models
 
167
  "model_list": model_list,
168
 
169
  "model_roles": {
170
+ "ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
171
+ "summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
172
  "chunking": ["intfloat/multilingual-e5-large-instruct"],
173
+ "single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
174
+ "multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
175
  },
176
  "pipeline": {
177
  "ingestion": {
 
202
  "additional_instructions": "Generate rich and creative questions to test a curious adult",
203
  "chunk_sampling": {
204
  "mode": "count",
205
+ "value": 5,
206
  "random_seed": 123,
207
  },
208
  },
backend/tasks/evaluation_task.py CHANGED
@@ -15,9 +15,20 @@ from typing import List, Dict
15
  from tasks.get_available_model_provider import get_available_model_provider
16
  from huggingface_hub import HfApi
17
  import asyncio
 
 
 
18
 
19
- # Valeur par défaut du timeout
20
- DEFAULT_EVALUATION_TIMEOUT = 120.0 # 1 minute par défaut
 
 
 
 
 
 
 
 
21
 
22
  class EvaluationTask:
23
  """
@@ -42,9 +53,9 @@ class EvaluationTask:
42
  self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
43
  self.current_step = "initializing"
44
  self.completed_steps = []
45
- self.step_start_time = time.time() # Enregistrer le temps de début de l'étape actuelle
46
 
47
- # Nettoyer les anciens résultats si demandé
48
  if clean_old_results:
49
  self.clean_old_results()
50
 
@@ -55,18 +66,18 @@ class EvaluationTask:
55
  Args:
56
  step: Name of the step to update
57
  """
58
- # Calculer le temps écoulé depuis le début de l'étape précédente
59
  elapsed_since_step_start = time.time() - self.step_start_time
60
 
61
- # Si moins d'une seconde s'est écoulée, attendre pour compléter la seconde
62
  if elapsed_since_step_start < 1.0:
63
  await asyncio.sleep(1.0 - elapsed_since_step_start)
64
 
65
- # Mettre à jour l'étape courante et enregistrer le nouvel horodatage
66
  self.current_step = step
67
  self.step_start_time = time.time()
68
 
69
- # Ajouter aux étapes complétées si nécessaire
70
  if step not in self.completed_steps:
71
  self.completed_steps.append(step)
72
 
@@ -114,12 +125,12 @@ class EvaluationTask:
114
  Save evaluation results directly to the dataset on the Hub without persisting locally
115
  """
116
  try:
117
- # Trier les résultats par précision (du plus précis au moins précis)
118
  sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
119
 
120
- # Créer un fichier temporaire pour les résultats
121
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
122
- # Ajouter metadata aux résultats
123
  final_results = {
124
  "metadata": {
125
  "evaluation_date": datetime.now().isoformat(),
@@ -143,7 +154,7 @@ class EvaluationTask:
143
 
144
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
145
 
146
- # Supprimer le fichier temporaire
147
  os.unlink(temp_file_path)
148
  except Exception as e:
149
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
@@ -267,15 +278,15 @@ TASKS_TABLE = [yourbench]
267
  results = json.load(f)
268
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
269
 
270
- # Vérifier que la structure est celle attendue
271
  if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
272
  accuracy = results["results"]["all"]["accuracy"]
273
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
274
  else:
275
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure de résultats inattendue. Clés disponibles: {list(results.keys())}")
276
  if "results" in results:
277
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Clés dans 'results': {list(results['results'].keys()) if isinstance(results['results'], dict) else 'pas un dictionnaire'}")
278
- raise ValueError(f"Structure de résultats inattendue pour {model_name}")
279
 
280
  result_data = {
281
  "model": model_name,
@@ -315,38 +326,26 @@ TASKS_TABLE = [yourbench]
315
  # Load environment variables
316
  load_dotenv()
317
 
318
- # Models to evaluate - uniquement les modèles accessibles
319
- models = [
320
- "Qwen/QwQ-32B",
321
- "Qwen/Qwen2.5-72B-Instruct",
322
- "Qwen/Qwen2.5-32B-Instruct",
323
- "meta-llama/Llama-3.1-8B-Instruct",
324
- "meta-llama/Llama-3.3-70B-Instruct",
325
- "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
326
- "mistralai/Mistral-Small-24B-Instruct-2501",
327
- ]
328
-
329
- # Log pour voir la structure du dataset
330
  try:
331
- from datasets import load_dataset
332
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Tentative de chargement du dataset {self.dataset_name} pour inspection")
333
  dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
334
 
335
- # Vérifier la structure du premier exemple
336
  if len(dataset) > 0:
337
  first_example = dataset[0]
338
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure du premier exemple:")
339
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Clés: {first_example.keys()}")
340
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', 'non trouvé')}")
341
  except Exception as e:
342
- print(f"[{datetime.now().strftime('%H:%M:%S')}] Erreur lors de l'inspection du dataset: {str(e)}")
343
 
344
  # Step 1: Check available providers for each model
345
  await self.update_step("finding_available_model_providers")
346
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
347
 
348
  model_providers = {}
349
- for model in models:
350
  provider = get_available_model_provider(model, verbose=True)
351
  if provider:
352
  model_providers[model] = provider
 
15
  from tasks.get_available_model_provider import get_available_model_provider
16
  from huggingface_hub import HfApi
17
  import asyncio
18
+ from datasets import load_dataset
19
+ # Default timeout value
20
+ DEFAULT_EVALUATION_TIMEOUT = 60.0 # 1 minute by default
21
 
22
+ # Models to evaluate - only accessible models
23
+ DEFAULT_EVALUATION_MODELS = [
24
+ "Qwen/QwQ-32B",
25
+ "Qwen/Qwen2.5-72B-Instruct",
26
+ "Qwen/Qwen2.5-32B-Instruct",
27
+ "meta-llama/Llama-3.1-8B-Instruct",
28
+ "meta-llama/Llama-3.3-70B-Instruct",
29
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
30
+ "mistralai/Mistral-Small-24B-Instruct-2501",
31
+ ]
32
 
33
  class EvaluationTask:
34
  """
 
53
  self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
54
  self.current_step = "initializing"
55
  self.completed_steps = []
56
+ self.step_start_time = time.time() # Record the start time of the current step
57
 
58
+ # Clean old results if requested
59
  if clean_old_results:
60
  self.clean_old_results()
61
 
 
66
  Args:
67
  step: Name of the step to update
68
  """
69
+ # Calculate the elapsed time since the start of the previous step
70
  elapsed_since_step_start = time.time() - self.step_start_time
71
 
72
+ # If less than one second has passed, wait to complete the second
73
  if elapsed_since_step_start < 1.0:
74
  await asyncio.sleep(1.0 - elapsed_since_step_start)
75
 
76
+ # Update the current step and record the new timestamp
77
  self.current_step = step
78
  self.step_start_time = time.time()
79
 
80
+ # Add to completed steps if necessary
81
  if step not in self.completed_steps:
82
  self.completed_steps.append(step)
83
 
 
125
  Save evaluation results directly to the dataset on the Hub without persisting locally
126
  """
127
  try:
128
+ # Sort results by accuracy (from most accurate to least accurate)
129
  sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
130
 
131
+ # Create a temporary file for the results
132
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
133
+ # Add metadata to the results
134
  final_results = {
135
  "metadata": {
136
  "evaluation_date": datetime.now().isoformat(),
 
154
 
155
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
156
 
157
+ # Delete the temporary file
158
  os.unlink(temp_file_path)
159
  except Exception as e:
160
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
 
278
  results = json.load(f)
279
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
280
 
281
+ # Verify that the structure is as expected
282
  if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
283
  accuracy = results["results"]["all"]["accuracy"]
284
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
285
  else:
286
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Unexpected results structure. Available keys: {list(results.keys())}")
287
  if "results" in results:
288
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys in 'results': {list(results['results'].keys()) if isinstance(results['results'], dict) else 'not a dictionary'}")
289
+ raise ValueError(f"Unexpected results structure for {model_name}")
290
 
291
  result_data = {
292
  "model": model_name,
 
326
  # Load environment variables
327
  load_dotenv()
328
 
329
+ # Log to see the structure of the dataset
 
 
 
 
 
 
 
 
 
 
 
330
  try:
331
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Attempting to load dataset {self.dataset_name} for inspection")
 
332
  dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
333
 
334
+ # Verify the structure of the first example
335
  if len(dataset) > 0:
336
  first_example = dataset[0]
337
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure of the first example:")
338
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys: {first_example.keys()}")
339
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', 'not found')}")
340
  except Exception as e:
341
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] Error inspecting the dataset: {str(e)}")
342
 
343
  # Step 1: Check available providers for each model
344
  await self.update_step("finding_available_model_providers")
345
  print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
346
 
347
  model_providers = {}
348
+ for model in DEFAULT_EVALUATION_MODELS:
349
  provider = get_available_model_provider(model, verbose=True)
350
  if provider:
351
  model_providers[model] = provider
backend/tasks/get_available_model_provider.py CHANGED
@@ -8,7 +8,7 @@ from dotenv import load_dotenv
8
  load_dotenv()
9
 
10
  # Define preferred providers
11
- PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -30,11 +30,17 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
30
  Returns:
31
  True if the provider is available, False otherwise
32
  """
 
33
  try:
34
  # Get HF token from environment
35
  hf_token = os.environ.get("HF_TOKEN")
36
  if not hf_token:
37
  raise ValueError("HF_TOKEN not defined in environment")
 
 
 
 
 
38
 
39
  if verbose:
40
  logger.info(f"Testing provider {provider} for model {model_name}")
@@ -44,6 +50,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
44
  model=model_name,
45
  token=hf_token,
46
  provider=provider,
 
47
  timeout=10 # Increased timeout to allow model loading
48
  )
49
 
 
8
  load_dotenv()
9
 
10
  # Define preferred providers
11
+ PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
30
  Returns:
31
  True if the provider is available, False otherwise
32
  """
33
+
34
  try:
35
  # Get HF token from environment
36
  hf_token = os.environ.get("HF_TOKEN")
37
  if not hf_token:
38
  raise ValueError("HF_TOKEN not defined in environment")
39
+ # Get HF token from environment
40
+ hf_organization = os.environ.get("HF_ORGANIZATION")
41
+ if not hf_organization:
42
+ raise ValueError("HF_ORGANIZATION not defined in environment")
43
+
44
 
45
  if verbose:
46
  logger.info(f"Testing provider {provider} for model {model_name}")
 
50
  model=model_name,
51
  token=hf_token,
52
  provider=provider,
53
+ bill_to=hf_organization,
54
  timeout=10 # Increased timeout to allow model loading
55
  )
56
 
frontend/server.js CHANGED
@@ -34,8 +34,6 @@ app.use(
34
  "/health",
35
  "/upload",
36
  "/generate-benchmark",
37
- "/config-logs",
38
- "/benchmark-logs",
39
  "/benchmark-progress",
40
  "/benchmark-questions",
41
  "/evaluate-benchmark",
 
34
  "/health",
35
  "/upload",
36
  "/generate-benchmark",
 
 
37
  "/benchmark-progress",
38
  "/benchmark-questions",
39
  "/evaluate-benchmark",
frontend/src/App.js CHANGED
@@ -9,14 +9,13 @@ import {
9
  import getTheme from "./config/theme";
10
  import { useThemeMode } from "./hooks/useThemeMode";
11
  import { ThemeProvider } from "@mui/material/styles";
12
- import ExternalLinks from "./components/ExternalLinks";
13
- import KeyboardShortcuts from "./components/KeyboardShortcuts";
14
  import HomePage from "./pages/HomePage";
15
  import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
16
  import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
17
  import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
18
  import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
19
-
20
  // Function to synchronize URL hash with parent Hugging Face page
21
  const syncURLWithParent = () => {
22
  // This function is only necessary in a Hugging Face Spaces environment
@@ -83,9 +82,8 @@ function App() {
83
  <CssBaseline />
84
  <Router>
85
  <Container maxWidth="md">
86
- <ExternalLinks />
87
  <Box sx={{ pt: 12, pb: 4 }}>
88
- <KeyboardShortcuts />
89
  <Routes>
90
  <Route path="/" element={<HomePage />} />
91
  <Route
@@ -106,22 +104,7 @@ function App() {
106
  />
107
  <Route path="*" element={<Navigate to="/" replace />} />
108
  </Routes>
109
- <Box
110
- component="footer"
111
- sx={{
112
- mt: 4,
113
- textAlign: "center",
114
- fontSize: "0.875rem",
115
- color: "text.secondary",
116
- opacity: 0.7,
117
- maxWidth: { xs: "100%", md: "70%" },
118
- mx: "auto",
119
- }}
120
- >
121
- We keep processed documents for research purposes, to which you
122
- agree by using the space. For a fully private usage, please
123
- duplicate the advanced space
124
- </Box>
125
  </Box>
126
  </Container>
127
  </Router>
 
9
  import getTheme from "./config/theme";
10
  import { useThemeMode } from "./hooks/useThemeMode";
11
  import { ThemeProvider } from "@mui/material/styles";
12
+ import Navigation from "./components/Navigation";
 
13
  import HomePage from "./pages/HomePage";
14
  import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
15
  import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
16
  import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
17
  import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
18
+ import Footer from "./components/Footer/Footer";
19
  // Function to synchronize URL hash with parent Hugging Face page
20
  const syncURLWithParent = () => {
21
  // This function is only necessary in a Hugging Face Spaces environment
 
82
  <CssBaseline />
83
  <Router>
84
  <Container maxWidth="md">
85
+ <Navigation />
86
  <Box sx={{ pt: 12, pb: 4 }}>
 
87
  <Routes>
88
  <Route path="/" element={<HomePage />} />
89
  <Route
 
104
  />
105
  <Route path="*" element={<Navigate to="/" replace />} />
106
  </Routes>
107
+ <Footer />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  </Box>
109
  </Container>
110
  </Router>
frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx} RENAMED
@@ -25,18 +25,18 @@ import MenuBookIcon from "@mui/icons-material/MenuBook";
25
  import DownloadIcon from "@mui/icons-material/Download";
26
  import VisibilityIcon from "@mui/icons-material/Visibility";
27
  import CloseIcon from "@mui/icons-material/Close";
28
- import { useThemeMode } from "../hooks/useThemeMode";
29
- import getTheme from "../config/theme";
30
- import API_CONFIG from "../config/api";
31
 
32
  /**
33
  * Component for creating a new benchmark, including file upload and generation initiation
34
  *
35
  * @param {Object} props - Component props
36
  * @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
37
- * @returns {JSX.Element} BenchmarkCreateForm component
38
  */
39
- function BenchmarkCreateForm({ onStartGeneration }) {
40
  const { mode } = useThemeMode();
41
  const theme = getTheme(mode);
42
  const [isDragging, setIsDragging] = useState(false);
@@ -110,11 +110,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
110
  return;
111
  }
112
 
113
- // Check file size limit (1MB = 1048576 bytes)
114
- if (file.size > 1048576) {
115
  setUploadStatus({
116
  success: false,
117
- message: "File size exceeds the 1MB limit",
118
  });
119
  setOpenSnackbar(true);
120
  return;
@@ -192,11 +192,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
192
  return;
193
  }
194
 
195
- // Check file size limit (10MB = 10485760 bytes)
196
- if (file.size > 10485760) {
197
  setUploadStatus({
198
  success: false,
199
- message: "File size exceeds the 10MB limit",
200
  });
201
  setOpenSnackbar(true);
202
  return;
@@ -580,4 +580,4 @@ function BenchmarkCreateForm({ onStartGeneration }) {
580
  );
581
  }
582
 
583
- export default BenchmarkCreateForm;
 
25
  import DownloadIcon from "@mui/icons-material/Download";
26
  import VisibilityIcon from "@mui/icons-material/Visibility";
27
  import CloseIcon from "@mui/icons-material/Close";
28
+ import { useThemeMode } from "../../hooks/useThemeMode";
29
+ import getTheme from "../../config/theme";
30
+ import API_CONFIG from "../../config/api";
31
 
32
  /**
33
  * Component for creating a new benchmark, including file upload and generation initiation
34
  *
35
  * @param {Object} props - Component props
36
  * @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
37
+ * @returns {JSX.Element} CreateForm component
38
  */
39
+ function CreateForm({ onStartGeneration }) {
40
  const { mode } = useThemeMode();
41
  const theme = getTheme(mode);
42
  const [isDragging, setIsDragging] = useState(false);
 
110
  return;
111
  }
112
 
113
+ // Check file size limit (3MB = 3145728 bytes)
114
+ if (file.size > 1048576 * 2) {
115
  setUploadStatus({
116
  success: false,
117
+ message: "File size exceeds the 2MB limit",
118
  });
119
  setOpenSnackbar(true);
120
  return;
 
192
  return;
193
  }
194
 
195
+ // Check file size limit (3MB = 3145728 bytes)
196
+ if (file.size > 1048576 * 3) {
197
  setUploadStatus({
198
  success: false,
199
+ message: "File size exceeds the 3MB limit",
200
  });
201
  setOpenSnackbar(true);
202
  return;
 
580
  );
581
  }
582
 
583
+ export default CreateForm;
frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx} RENAMED
@@ -16,9 +16,9 @@ import AssessmentIcon from "@mui/icons-material/Assessment";
16
  import LinkIcon from "@mui/icons-material/Link";
17
  import DownloadIcon from "@mui/icons-material/Download";
18
  import CheckCircleIcon from "@mui/icons-material/CheckCircle";
19
- import API_CONFIG from "../config/api";
20
- import { useThemeMode } from "../hooks/useThemeMode";
21
- import getTheme from "../config/theme";
22
 
23
  /**
24
  * Component to display benchmark information and evaluation button
@@ -30,7 +30,7 @@ import getTheme from "../config/theme";
30
  * @param {string} props.datasetUrl - URL to the Hugging Face dataset
31
  * @returns {JSX.Element} Benchmark display component
32
  */
33
- const BenchmarkDisplay = ({
34
  sampleQuestions = [],
35
  onStartEvaluation,
36
  sessionId,
@@ -40,26 +40,6 @@ const BenchmarkDisplay = ({
40
  const { mode } = useThemeMode();
41
  const theme = getTheme(mode);
42
 
43
- // Default questions if none provided
44
- const questions =
45
- sampleQuestions.length > 0
46
- ? sampleQuestions
47
- : [
48
- {
49
- id: 1,
50
- question: "What are the key benefits of the described technology?",
51
- answer: "No answer available",
52
- type: "single_shot",
53
- },
54
- {
55
- id: 2,
56
- question:
57
- "Based on the context about machine learning frameworks, how does TensorFlow compare to PyTorch in terms of deployment capabilities?",
58
- answer: "No answer available",
59
- type: "multi_hop",
60
- },
61
- ];
62
-
63
  const handleEvaluationClick = () => {
64
  if (onStartEvaluation) {
65
  onStartEvaluation();
@@ -139,7 +119,7 @@ const BenchmarkDisplay = ({
139
  </Typography>
140
 
141
  <Box sx={{ mb: 3 }}>
142
- {questions.map((q, index) => (
143
  <Card
144
  key={q.id || index}
145
  variant="outlined"
@@ -179,4 +159,4 @@ const BenchmarkDisplay = ({
179
  );
180
  };
181
 
182
- export default BenchmarkDisplay;
 
16
  import LinkIcon from "@mui/icons-material/Link";
17
  import DownloadIcon from "@mui/icons-material/Download";
18
  import CheckCircleIcon from "@mui/icons-material/CheckCircle";
19
+ import API_CONFIG from "../../config/api";
20
+ import { useThemeMode } from "../../hooks/useThemeMode";
21
+ import getTheme from "../../config/theme";
22
 
23
  /**
24
  * Component to display benchmark information and evaluation button
 
30
  * @param {string} props.datasetUrl - URL to the Hugging Face dataset
31
  * @returns {JSX.Element} Benchmark display component
32
  */
33
+ const Display = ({
34
  sampleQuestions = [],
35
  onStartEvaluation,
36
  sessionId,
 
40
  const { mode } = useThemeMode();
41
  const theme = getTheme(mode);
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  const handleEvaluationClick = () => {
44
  if (onStartEvaluation) {
45
  onStartEvaluation();
 
119
  </Typography>
120
 
121
  <Box sx={{ mb: 3 }}>
122
+ {sampleQuestions.map((q, index) => (
123
  <Card
124
  key={q.id || index}
125
  variant="outlined"
 
159
  );
160
  };
161
 
162
+ export default Display;
frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx} RENAMED
@@ -2,14 +2,15 @@ import React, { useState, useEffect, useRef } from "react";
2
  import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
3
  import PlayArrowIcon from "@mui/icons-material/PlayArrow";
4
  import AccessTimeIcon from "@mui/icons-material/AccessTime";
5
- import LogDisplay from "./LogDisplay";
6
  import { useNavigate, useSearchParams } from "react-router-dom";
7
- import API_CONFIG from "../config/api";
 
8
 
9
- // Simulation time in milliseconds for pre-calculated documents
10
  const SIMULATION_DURATION = 80000; // 20 seconds
11
 
12
- // Define all benchmark steps in sequence
13
  const BENCHMARK_STEPS = [
14
  "configuration",
15
  "provider_check",
@@ -20,7 +21,7 @@ const BENCHMARK_STEPS = [
20
  "single_shot_question_generation",
21
  ];
22
 
23
- // Step labels for display (more user-friendly names)
24
  const STEP_LABELS = {
25
  configuration: "Configuration",
26
  provider_check: "Finding providers",
@@ -34,7 +35,7 @@ const STEP_LABELS = {
34
  evaluation_saving_results: "Saving evaluation results",
35
  };
36
 
37
- // Simulated log messages for pre-calculated documents
38
  const SIMULATED_LOGS = [
39
  "[INFO] Initializing benchmark generation...",
40
  "[INFO] Generating base configuration file...",
@@ -55,18 +56,21 @@ const SIMULATED_LOGS = [
55
  ];
56
 
57
  /**
58
- * Component to handle benchmark generation and display logs
59
  *
60
- * @param {Object} props - Component props
61
- * @param {string} props.sessionId - The session ID for the uploaded file
62
- * @param {boolean} props.isDefaultDocument - Whether this is a pre-calculated document
63
- * @param {Function} props.onComplete - Function to call when generation is complete
64
- * @returns {JSX.Element} Benchmark generator component
65
  */
66
- const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
 
67
  const [searchParams] = useSearchParams();
68
  const isDefault =
69
  searchParams.get("isDefault") === "true" || isDefaultDocument;
 
 
70
  const [generating, setGenerating] = useState(false);
71
  const [generationComplete, setGenerationComplete] = useState(false);
72
  const [generationLogs, setGenerationLogs] = useState([]);
@@ -76,53 +80,68 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
76
  const [activeStep, setActiveStep] = useState(1);
77
  const [elapsedTime, setElapsedTime] = useState(0);
78
 
79
- // Reference to keep track of the polling interval
80
  const pollingIntervalRef = useRef(null);
81
-
82
- // Reference to keep track of the timer interval
83
  const timerIntervalRef = useRef(null);
84
-
85
- // Reference for starting time
86
  const startTimeRef = useRef(null);
87
-
88
- // Simulation interval reference
89
  const simulationIntervalRef = useRef(null);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- // Start generation on component mount
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  useEffect(() => {
93
- // Set start time
94
  startTimeRef.current = Date.now();
95
 
96
- // Reference for the timeout
97
- let timeoutRef = null;
98
-
99
- // Start timer
100
  timerIntervalRef.current = setInterval(() => {
101
  const timeElapsed = Math.floor(
102
  (Date.now() - startTimeRef.current) / 1000
103
  );
104
  setElapsedTime(timeElapsed);
105
 
106
- // Check if the elapsed time exceeds 8 minutes (480 seconds) and we are not in simulation mode
107
- if (timeElapsed > 480 && !isDefault && !generationComplete) {
108
- // Display an error message in case of timeout
109
  setError(
110
  "The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
111
  );
112
- setGenerationComplete(true);
113
-
114
- // Clear intervals
115
- if (pollingIntervalRef.current) {
116
- clearInterval(pollingIntervalRef.current);
117
- }
118
-
119
- if (timerIntervalRef.current) {
120
- clearInterval(timerIntervalRef.current);
121
- }
122
  }
123
  }, 1000);
124
 
125
- // Handler to detect when the page becomes visible again
126
  const handleVisibilityChange = () => {
127
  if (
128
  document.visibilityState === "visible" &&
@@ -130,45 +149,22 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
130
  !generationComplete
131
  ) {
132
  console.log("Page became visible, checking for missed steps...");
133
- // Force a new request to retrieve the logs
 
134
  const checkCurrentState = async () => {
135
  try {
136
- // First try to retrieve the benchmark logs
137
- const logsResponse = await fetch(
138
- `${API_CONFIG.BASE_URL}/benchmark-logs/${sessionId}`
139
  );
140
 
141
- if (logsResponse.ok) {
142
- const logsResult = await logsResponse.json();
143
- if (logsResult.logs) {
144
- setGenerationLogs(logsResult.logs);
145
  }
146
 
147
- // If the task is complete, update the state
148
- if (logsResult.is_completed) {
149
- setGenerationComplete(true);
150
- if (pollingIntervalRef.current) {
151
- clearInterval(pollingIntervalRef.current);
152
- }
153
- if (onComplete) {
154
- onComplete({
155
- success: true,
156
- sessionId,
157
- logs: logsResult.logs,
158
- });
159
- }
160
- }
161
- } else {
162
- // If the benchmark task does not exist, try the configuration logs
163
- const configResponse = await fetch(
164
- `${API_CONFIG.BASE_URL}/config-logs/${sessionId}`
165
- );
166
-
167
- if (configResponse.ok) {
168
- const configResult = await configResponse.json();
169
- if (configResult.logs) {
170
- setGenerationLogs(configResult.logs);
171
- }
172
  }
173
  }
174
  } catch (error) {
@@ -180,103 +176,89 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
180
  }
181
  };
182
 
183
- // Add the listener for visibility change
184
  document.addEventListener("visibilitychange", handleVisibilityChange);
185
 
 
186
  if (isDefault) {
187
  simulateGeneration();
188
  } else {
189
  generateBenchmark();
190
  }
191
 
192
- // Clean up the polling interval and timer when the component unmounts
193
  return () => {
194
- if (pollingIntervalRef.current) {
195
- clearInterval(pollingIntervalRef.current);
196
- }
197
- if (timerIntervalRef.current) {
198
- clearInterval(timerIntervalRef.current);
199
- }
200
- if (simulationIntervalRef.current) {
201
- clearInterval(simulationIntervalRef.current);
202
- }
203
  document.removeEventListener("visibilitychange", handleVisibilityChange);
204
  };
205
  }, [isDefault, sessionId, generationComplete, onComplete]);
206
 
207
- // Simulate the benchmark generation for pre-calculated documents
208
  const simulateGeneration = () => {
209
- setGenerating(true);
210
- setGenerationLogs([]);
211
- setError(null);
212
- setCurrentPhase("initializing");
213
- setCompletedSteps([]);
214
- setActiveStep(1);
215
 
216
- // Timing variables for simulation
217
  const totalSteps = SIMULATED_LOGS.length;
218
- const totalDuration = SIMULATION_DURATION; // 20 seconds
219
- const intervalPerStep = totalDuration / totalSteps;
220
  let currentStep = 0;
221
 
222
- // Function to add next log message
223
  const addNextLog = () => {
224
  if (currentStep < SIMULATED_LOGS.length) {
225
  const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
226
  setGenerationLogs(newLogs);
227
  currentStep++;
228
 
229
- // Check if completed
230
  if (currentStep >= SIMULATED_LOGS.length) {
231
- // Simulation complete
232
  setTimeout(() => {
233
  setCurrentPhase("complete");
234
- setGenerationComplete(true);
235
- clearInterval(simulationIntervalRef.current);
236
- if (onComplete) {
237
- onComplete({
238
- success: true,
239
- sessionId,
240
- logs: newLogs,
241
- });
242
- }
243
  }, 1000);
244
  }
245
  }
246
  };
247
 
248
- // Start simulation
249
  simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
250
  };
251
 
252
- // Determine the current phase and completed steps based on logs
253
  useEffect(() => {
254
  if (generationLogs.length === 0) return;
255
 
256
- // Recalculate completed steps completely each time
257
- // instead of just adding new steps
258
  const newCompletedSteps = [];
259
 
260
- // Check for rate limiting errors
261
- const hasRateLimitError = generationLogs.some(
262
  (log) =>
263
  log.includes("RATE_LIMIT_EXCEEDED") ||
264
  log.includes("heavy load") ||
265
- log.includes("rate limit")
 
 
 
 
266
  );
267
 
268
- if (hasRateLimitError) {
269
- setError(
270
- "The demo is under heavy load at the moment. Please try again later."
271
- );
272
- setGenerationComplete(true);
273
- if (pollingIntervalRef.current) {
274
- clearInterval(pollingIntervalRef.current);
275
- }
 
 
 
 
276
  return;
277
  }
278
 
279
- // Identify all completed steps in all logs
280
  generationLogs.forEach((log) => {
281
  const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
282
  if (match && match[1]) {
@@ -290,48 +272,48 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
290
  }
291
  });
292
 
293
- // Determine the active step based on completed steps
294
  let newActiveStep = activeStep;
295
 
296
  if (newCompletedSteps.length > 0) {
297
- // Find the most advanced step in the logs
298
  const maxCompletedStepIndex = Math.max(
299
  ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
300
  );
301
- // Move to the next step
302
  const calculatedStep = maxCompletedStepIndex + 1;
303
 
304
- // Update only if the new step is more advanced than the current step
305
  if (calculatedStep > activeStep) {
306
  newActiveStep = calculatedStep;
307
  }
308
 
309
- // Ensure that activeStep does not exceed the total number of steps
310
  if (newActiveStep >= BENCHMARK_STEPS.length) {
311
  newActiveStep = BENCHMARK_STEPS.length;
312
  }
313
  } else if (activeStep === 0) {
314
- // If no step is found and the active step is 0, move to 1
315
  newActiveStep = 1;
316
  }
317
 
318
- // Update the state if the steps have changed
319
  if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
320
  setCompletedSteps(newCompletedSteps);
321
  }
322
 
323
- // Update the active step only if it has changed
324
  if (newActiveStep !== activeStep) {
325
  setActiveStep(newActiveStep);
326
  }
327
 
328
- // Skip the rest of the log processing if we're simulating
329
  if (isDefault) return;
330
 
331
- // Check the latest logs to determine the current phase
332
- const recentLogs = generationLogs.slice(-10); // Check more logs
333
 
334
- // Detect completion conditions
335
  const isComplete =
336
  recentLogs.some((log) =>
337
  log.includes("[SUCCESS] Benchmark process completed successfully")
@@ -344,20 +326,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
344
 
345
  if (isComplete) {
346
  setCurrentPhase("complete");
347
- setGenerationComplete(true);
348
- // Stop polling when benchmark is complete
349
- if (pollingIntervalRef.current) {
350
- clearInterval(pollingIntervalRef.current);
351
- }
352
- // Notify parent component that generation is complete
353
- if (onComplete) {
354
- console.log("Notifying parent that generation is complete");
355
- onComplete({
356
- success: true,
357
- sessionId,
358
- logs: generationLogs,
359
- });
360
- }
361
  } else if (
362
  recentLogs.some((log) => log.includes("Starting ingestion process"))
363
  ) {
@@ -376,31 +345,23 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
376
  isDefault,
377
  ]);
378
 
 
379
  const generateBenchmark = async () => {
380
  if (!sessionId) {
381
  setError("Missing session ID");
382
  return;
383
  }
384
 
385
- setGenerating(true);
386
- setGenerationLogs([]);
387
- setError(null);
388
- setCurrentPhase("initializing");
389
- setCompletedSteps([]);
390
- setActiveStep(1);
391
 
392
  try {
393
- // Call the API to generate the benchmark
394
  const response = await fetch(
395
  `${API_CONFIG.BASE_URL}/generate-benchmark`,
396
  {
397
  method: "POST",
398
- headers: {
399
- "Content-Type": "application/json",
400
- },
401
- body: JSON.stringify({
402
- session_id: sessionId,
403
- }),
404
  }
405
  );
406
 
@@ -409,16 +370,16 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
409
  if (response.ok) {
410
  setGenerationLogs(result.logs || []);
411
 
412
- // Set up polling to track progress
413
  pollingIntervalRef.current = setInterval(async () => {
414
- // Check if we have already completed
415
  if (generationComplete) {
416
  clearInterval(pollingIntervalRef.current);
417
  return;
418
  }
419
 
420
  try {
421
- // Call the API to get the latest logs
422
  const logsResponse = await fetch(
423
  `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
424
  );
@@ -426,7 +387,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
426
  if (logsResponse.ok) {
427
  const logsResult = await logsResponse.json();
428
 
429
- // Update logs if there are new ones
430
  if (
431
  logsResult.logs &&
432
  logsResult.logs.length > generationLogs.length
@@ -434,20 +395,19 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
434
  setGenerationLogs(logsResult.logs);
435
  }
436
 
437
- // Check if the task is complete
438
  if (logsResult.is_completed) {
439
  setGenerationComplete(true);
440
  clearInterval(pollingIntervalRef.current);
441
- // Notification is now handled in the useEffect above
442
  }
443
  }
444
  } catch (error) {
445
  console.log("Error polling for logs:", error);
446
- // Do not stop polling in case of network errors
447
  }
448
- }, 2000); // Poll every 2 seconds
449
  } else {
450
- // Handle error
451
  setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
452
  setError(result.error || "Benchmark generation failed");
453
  }
@@ -460,29 +420,29 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
460
  }
461
  };
462
 
463
- // Get the current step information for display
464
  const getCurrentStepInfo = () => {
465
  const totalSteps = BENCHMARK_STEPS.length;
466
  const currentStepIndex = activeStep;
467
 
468
- // If there's no active step yet
469
  if (currentStepIndex <= 1 && completedSteps.length === 0) {
470
  return `Starting (1/${totalSteps})`;
471
  }
472
 
473
- // If all steps are completed
474
  if (currentStepIndex >= totalSteps) {
475
  return `Complete (${totalSteps}/${totalSteps})`;
476
  }
477
 
478
- // Get current step name
479
  const currentStepName =
480
  STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
481
 
482
  return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
483
  };
484
 
485
- // Format elapsed time in HH:MM:SS
486
  const formatElapsedTime = () => {
487
  const hours = Math.floor(elapsedTime / 3600);
488
  const minutes = Math.floor((elapsedTime % 3600) / 60);
@@ -495,13 +455,27 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
495
  ].join(":");
496
  };
497
 
498
- // If complete, stop the timer
499
  useEffect(() => {
500
  if (generationComplete && timerIntervalRef.current) {
501
  clearInterval(timerIntervalRef.current);
502
  }
503
  }, [generationComplete]);
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  return (
506
  <Paper
507
  elevation={3}
@@ -544,9 +518,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
544
  </Box>
545
 
546
  {error ? (
547
- <Alert severity="error" sx={{ width: "100%" }}>
548
- {error}
549
- </Alert>
550
  ) : (
551
  <>
552
  <CircularProgress size={60} sx={{ mb: 2 }} />
@@ -581,4 +553,4 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
581
  );
582
  };
583
 
584
- export default BenchmarkGenerator;
 
2
  import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
3
  import PlayArrowIcon from "@mui/icons-material/PlayArrow";
4
  import AccessTimeIcon from "@mui/icons-material/AccessTime";
5
+ import LogDisplay from "../LogDisplay";
6
  import { useNavigate, useSearchParams } from "react-router-dom";
7
+ import API_CONFIG from "../../config/api";
8
+ import ErrorDisplay from "../common/ErrorDisplay";
9
 
10
+ // Durée de simulation en millisecondes pour les documents précalculés
11
  const SIMULATION_DURATION = 80000; // 20 seconds
12
 
13
+ // Définir toutes les étapes du benchmark en séquence
14
  const BENCHMARK_STEPS = [
15
  "configuration",
16
  "provider_check",
 
21
  "single_shot_question_generation",
22
  ];
23
 
24
+ // Étiquettes des étapes pour l'affichage (noms plus conviviaux)
25
  const STEP_LABELS = {
26
  configuration: "Configuration",
27
  provider_check: "Finding providers",
 
35
  evaluation_saving_results: "Saving evaluation results",
36
  };
37
 
38
+ // Messages de log simulés pour les documents précalculés
39
  const SIMULATED_LOGS = [
40
  "[INFO] Initializing benchmark generation...",
41
  "[INFO] Generating base configuration file...",
 
56
  ];
57
 
58
  /**
59
+ * Composant pour gérer la génération de benchmark et afficher les logs
60
  *
61
+ * @param {Object} props - Propriétés du composant
62
+ * @param {string} props.sessionId - ID de session pour le fichier uploadé
63
+ * @param {boolean} props.isDefaultDocument - S'il s'agit d'un document précalculé
64
+ * @param {Function} props.onComplete - Fonction à appeler lorsque la génération est terminée
65
+ * @returns {JSX.Element} Composant de génération de benchmark
66
  */
67
+ const Generator = ({ sessionId, isDefaultDocument, onComplete }) => {
68
+ const navigate = useNavigate();
69
  const [searchParams] = useSearchParams();
70
  const isDefault =
71
  searchParams.get("isDefault") === "true" || isDefaultDocument;
72
+
73
+ // États du composant
74
  const [generating, setGenerating] = useState(false);
75
  const [generationComplete, setGenerationComplete] = useState(false);
76
  const [generationLogs, setGenerationLogs] = useState([]);
 
80
  const [activeStep, setActiveStep] = useState(1);
81
  const [elapsedTime, setElapsedTime] = useState(0);
82
 
83
+ // Références pour les intervalles et timers
84
  const pollingIntervalRef = useRef(null);
 
 
85
  const timerIntervalRef = useRef(null);
 
 
86
  const startTimeRef = useRef(null);
 
 
87
  const simulationIntervalRef = useRef(null);
88
+ const hasRedirectedRef = useRef(false);
89
+
90
+ // Fonction pour réinitialiser les états de génération
91
+ const resetGenerationStates = () => {
92
+ setGenerating(true);
93
+ setGenerationLogs([]);
94
+ setError(null);
95
+ setCurrentPhase("initializing");
96
+ setCompletedSteps([]);
97
+ setActiveStep(1);
98
+ };
99
+
100
+ // Fonction pour arrêter les intervalles
101
+ const clearAllIntervals = () => {
102
+ if (pollingIntervalRef.current) clearInterval(pollingIntervalRef.current);
103
+ if (timerIntervalRef.current) clearInterval(timerIntervalRef.current);
104
+ if (simulationIntervalRef.current)
105
+ clearInterval(simulationIntervalRef.current);
106
+ };
107
 
108
+ // Fonction pour notifier la fin de la génération
109
+ const notifyGenerationComplete = (success, logs, errorMsg = null) => {
110
+ setGenerationComplete(true);
111
+ clearAllIntervals();
112
+
113
+ if (onComplete) {
114
+ onComplete({
115
+ success,
116
+ sessionId,
117
+ logs: logs || generationLogs,
118
+ error: errorMsg,
119
+ });
120
+ }
121
+ };
122
+
123
+ // Démarrer la génération au montage du composant
124
  useEffect(() => {
125
+ // Configurer l'heure de départ
126
  startTimeRef.current = Date.now();
127
 
128
+ // Démarrer le timer
 
 
 
129
  timerIntervalRef.current = setInterval(() => {
130
  const timeElapsed = Math.floor(
131
  (Date.now() - startTimeRef.current) / 1000
132
  );
133
  setElapsedTime(timeElapsed);
134
 
135
+ // Vérifier si le temps écoulé dépasse 5 minutes et que nous ne sommes pas en mode simulation
136
+ if (timeElapsed > 300 && !isDefault && !generationComplete) {
 
137
  setError(
138
  "The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
139
  );
140
+ notifyGenerationComplete(false, null, "Timeout error");
 
 
 
 
 
 
 
 
 
141
  }
142
  }, 1000);
143
 
144
+ // Gestionnaire pour détecter quand la page redevient visible
145
  const handleVisibilityChange = () => {
146
  if (
147
  document.visibilityState === "visible" &&
 
149
  !generationComplete
150
  ) {
151
  console.log("Page became visible, checking for missed steps...");
152
+
153
+ // Forcer une nouvelle requête pour récupérer les logs
154
  const checkCurrentState = async () => {
155
  try {
156
+ const progressResponse = await fetch(
157
+ `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
 
158
  );
159
 
160
+ if (progressResponse.ok) {
161
+ const progressResult = await progressResponse.json();
162
+ if (progressResult.logs) {
163
+ setGenerationLogs(progressResult.logs);
164
  }
165
 
166
+ if (progressResult.is_completed) {
167
+ notifyGenerationComplete(true, progressResult.logs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  }
169
  }
170
  } catch (error) {
 
176
  }
177
  };
178
 
179
+ // Ajouter l'écouteur pour le changement de visibilité
180
  document.addEventListener("visibilitychange", handleVisibilityChange);
181
 
182
+ // Lancer la simulation ou la génération
183
  if (isDefault) {
184
  simulateGeneration();
185
  } else {
186
  generateBenchmark();
187
  }
188
 
189
+ // Nettoyer les intervalles et écouteurs lors du démontage
190
  return () => {
191
+ clearAllIntervals();
 
 
 
 
 
 
 
 
192
  document.removeEventListener("visibilitychange", handleVisibilityChange);
193
  };
194
  }, [isDefault, sessionId, generationComplete, onComplete]);
195
 
196
+ // Simuler la génération de benchmark pour les documents précalculés
197
  const simulateGeneration = () => {
198
+ resetGenerationStates();
 
 
 
 
 
199
 
200
+ // Variables de timing pour la simulation
201
  const totalSteps = SIMULATED_LOGS.length;
202
+ const intervalPerStep = SIMULATION_DURATION / totalSteps;
 
203
  let currentStep = 0;
204
 
205
+ // Fonction pour ajouter le prochain message de log
206
  const addNextLog = () => {
207
  if (currentStep < SIMULATED_LOGS.length) {
208
  const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
209
  setGenerationLogs(newLogs);
210
  currentStep++;
211
 
212
+ // Vérifier si terminé
213
  if (currentStep >= SIMULATED_LOGS.length) {
214
+ // Simulation terminée
215
  setTimeout(() => {
216
  setCurrentPhase("complete");
217
+ notifyGenerationComplete(true, newLogs);
 
 
 
 
 
 
 
 
218
  }, 1000);
219
  }
220
  }
221
  };
222
 
223
+ // Démarrer la simulation
224
  simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
225
  };
226
 
227
+ // Déterminer la phase actuelle et les étapes terminées en fonction des logs
228
  useEffect(() => {
229
  if (generationLogs.length === 0) return;
230
 
231
+ // Recalculer les étapes terminées à chaque fois
 
232
  const newCompletedSteps = [];
233
 
234
+ // Vérifier les erreurs de limitation de débit et de disponibilité du modèle
235
+ const hasError = generationLogs.some(
236
  (log) =>
237
  log.includes("RATE_LIMIT_EXCEEDED") ||
238
  log.includes("heavy load") ||
239
+ log.includes("rate limit") ||
240
+ log.includes("Required models not available") ||
241
+ log.includes("Configuration failed") ||
242
+ log.includes("Error") ||
243
+ log.includes("ERROR")
244
  );
245
 
246
+ if (hasError) {
247
+ const errorMessage =
248
+ generationLogs.find(
249
+ (log) =>
250
+ log.includes("Required models not available") ||
251
+ log.includes("Configuration failed") ||
252
+ log.includes("Error generating configuration")
253
+ ) ||
254
+ "The demo is under heavy load at the moment. Please try again later.";
255
+
256
+ setError(errorMessage);
257
+ notifyGenerationComplete(false, null, errorMessage);
258
  return;
259
  }
260
 
261
+ // Identifier toutes les étapes terminées dans tous les logs
262
  generationLogs.forEach((log) => {
263
  const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
264
  if (match && match[1]) {
 
272
  }
273
  });
274
 
275
+ // Déterminer l'étape active en fonction des étapes terminées
276
  let newActiveStep = activeStep;
277
 
278
  if (newCompletedSteps.length > 0) {
279
+ // Trouver l'étape la plus avancée dans les logs
280
  const maxCompletedStepIndex = Math.max(
281
  ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
282
  );
283
+ // Passer à l'étape suivante
284
  const calculatedStep = maxCompletedStepIndex + 1;
285
 
286
+ // Mettre à jour uniquement si la nouvelle étape est plus avancée que l'étape actuelle
287
  if (calculatedStep > activeStep) {
288
  newActiveStep = calculatedStep;
289
  }
290
 
291
+ // S'assurer que activeStep ne dépasse pas le nombre total d'étapes
292
  if (newActiveStep >= BENCHMARK_STEPS.length) {
293
  newActiveStep = BENCHMARK_STEPS.length;
294
  }
295
  } else if (activeStep === 0) {
296
+ // Si aucune étape n'est trouvée et que l'étape active est 0, passer à 1
297
  newActiveStep = 1;
298
  }
299
 
300
+ // Mettre à jour l'état si les étapes ont changé
301
  if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
302
  setCompletedSteps(newCompletedSteps);
303
  }
304
 
305
+ // Mettre à jour l'étape active uniquement si elle a changé
306
  if (newActiveStep !== activeStep) {
307
  setActiveStep(newActiveStep);
308
  }
309
 
310
+ // Ignorer le reste du traitement des logs si nous simulons
311
  if (isDefault) return;
312
 
313
+ // Vérifier les derniers logs pour déterminer la phase actuelle
314
+ const recentLogs = generationLogs.slice(-10);
315
 
316
+ // Détecter les conditions d'achèvement
317
  const isComplete =
318
  recentLogs.some((log) =>
319
  log.includes("[SUCCESS] Benchmark process completed successfully")
 
326
 
327
  if (isComplete) {
328
  setCurrentPhase("complete");
329
+ notifyGenerationComplete(true, generationLogs);
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  } else if (
331
  recentLogs.some((log) => log.includes("Starting ingestion process"))
332
  ) {
 
345
  isDefault,
346
  ]);
347
 
348
+ // Générer le benchmark
349
  const generateBenchmark = async () => {
350
  if (!sessionId) {
351
  setError("Missing session ID");
352
  return;
353
  }
354
 
355
+ resetGenerationStates();
 
 
 
 
 
356
 
357
  try {
358
+ // Appeler l'API pour générer le benchmark
359
  const response = await fetch(
360
  `${API_CONFIG.BASE_URL}/generate-benchmark`,
361
  {
362
  method: "POST",
363
+ headers: { "Content-Type": "application/json" },
364
+ body: JSON.stringify({ session_id: sessionId }),
 
 
 
 
365
  }
366
  );
367
 
 
370
  if (response.ok) {
371
  setGenerationLogs(result.logs || []);
372
 
373
+ // Configurer le polling pour suivre la progression
374
  pollingIntervalRef.current = setInterval(async () => {
375
+ // Vérifier si nous avons déjà terminé
376
  if (generationComplete) {
377
  clearInterval(pollingIntervalRef.current);
378
  return;
379
  }
380
 
381
  try {
382
+ // Appeler l'API pour obtenir les derniers logs
383
  const logsResponse = await fetch(
384
  `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
385
  );
 
387
  if (logsResponse.ok) {
388
  const logsResult = await logsResponse.json();
389
 
390
+ // Mettre à jour les logs s'il y en a de nouveaux
391
  if (
392
  logsResult.logs &&
393
  logsResult.logs.length > generationLogs.length
 
395
  setGenerationLogs(logsResult.logs);
396
  }
397
 
398
+ // Vérifier si la tâche est terminée
399
  if (logsResult.is_completed) {
400
  setGenerationComplete(true);
401
  clearInterval(pollingIntervalRef.current);
 
402
  }
403
  }
404
  } catch (error) {
405
  console.log("Error polling for logs:", error);
406
+ // Ne pas arrêter le polling en cas d'erreurs réseau
407
  }
408
+ }, 2000); // Sondage toutes les 2 secondes
409
  } else {
410
+ // Gérer l'erreur
411
  setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
412
  setError(result.error || "Benchmark generation failed");
413
  }
 
420
  }
421
  };
422
 
423
+ // Obtenir les informations sur l'étape actuelle pour l'affichage
424
  const getCurrentStepInfo = () => {
425
  const totalSteps = BENCHMARK_STEPS.length;
426
  const currentStepIndex = activeStep;
427
 
428
+ // S'il n'y a pas encore d'étape active
429
  if (currentStepIndex <= 1 && completedSteps.length === 0) {
430
  return `Starting (1/${totalSteps})`;
431
  }
432
 
433
+ // Si toutes les étapes sont terminées
434
  if (currentStepIndex >= totalSteps) {
435
  return `Complete (${totalSteps}/${totalSteps})`;
436
  }
437
 
438
+ // Obtenir le nom de l'étape actuelle
439
  const currentStepName =
440
  STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
441
 
442
  return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
443
  };
444
 
445
+ // Formater le temps écoulé en HH:MM:SS
446
  const formatElapsedTime = () => {
447
  const hours = Math.floor(elapsedTime / 3600);
448
  const minutes = Math.floor((elapsedTime % 3600) / 60);
 
455
  ].join(":");
456
  };
457
 
458
+ // Si terminé, arrêter le timer
459
  useEffect(() => {
460
  if (generationComplete && timerIntervalRef.current) {
461
  clearInterval(timerIntervalRef.current);
462
  }
463
  }, [generationComplete]);
464
 
465
+ const handleGenerationComplete = (result) => {
466
+ console.log("Benchmark generation completed:", result);
467
+ if (result && result.success && !hasRedirectedRef.current) {
468
+ hasRedirectedRef.current = true; // Marquer que la redirection a été faite
469
+ // Légère pause avant de naviguer pour éviter les problèmes de synchronisation
470
+ setTimeout(() => {
471
+ navigate(`/benchmark-display?session=${sessionId}`);
472
+ }, 500);
473
+ } else if (result && !result.success) {
474
+ // Afficher l'erreur au lieu de rediriger
475
+ setError(result.error || "An error occurred during benchmark generation");
476
+ }
477
+ };
478
+
479
  return (
480
  <Paper
481
  elevation={3}
 
518
  </Box>
519
 
520
  {error ? (
521
+ <ErrorDisplay error={error} />
 
 
522
  ) : (
523
  <>
524
  <CircularProgress size={60} sx={{ mb: 2 }} />
 
553
  );
554
  };
555
 
556
+ export default Generator;
frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect } from "react";
2
+
3
+ const BENCHMARK_STEPS = [
4
+ "configuration",
5
+ "provider_check",
6
+ "ingestion",
7
+ "upload_ingest_to_hub",
8
+ "summarization",
9
+ "chunking",
10
+ "single_shot_question_generation",
11
+ ];
12
+
13
+ export const useBenchmarkLogs = (sessionId, isDefault, onComplete) => {
14
+ const [generationLogs, setGenerationLogs] = useState([]);
15
+ const [error, setError] = useState(null);
16
+ const [currentPhase, setCurrentPhase] = useState("initializing");
17
+ const [completedSteps, setCompletedSteps] = useState([]);
18
+ const [activeStep, setActiveStep] = useState(1);
19
+ const [generationComplete, setGenerationComplete] = useState(false);
20
+
21
+ const checkForErrors = (logs) => {
22
+ // Check for rate limiting errors
23
+ const hasRateLimitError = logs.some(
24
+ (log) =>
25
+ log.includes("RATE_LIMIT_EXCEEDED") ||
26
+ log.includes("heavy load") ||
27
+ log.includes("rate limit")
28
+ );
29
+
30
+ if (hasRateLimitError) {
31
+ return {
32
+ hasError: true,
33
+ error:
34
+ "The demo is under heavy load at the moment. Please try again later.",
35
+ };
36
+ }
37
+
38
+ // Check for model availability errors
39
+ const hasModelError = logs.some(
40
+ (log) =>
41
+ log.includes("Required models not available") ||
42
+ log.includes("Some required models are not available")
43
+ );
44
+
45
+ if (hasModelError) {
46
+ return {
47
+ hasError: true,
48
+ error:
49
+ "Some required models are not available at the moment. Please try again later.",
50
+ };
51
+ }
52
+
53
+ // Check for configuration errors
54
+ const hasConfigError = logs.some(
55
+ (log) =>
56
+ log.includes("Error generating configuration") ||
57
+ log.includes("Configuration failed")
58
+ );
59
+
60
+ if (hasConfigError) {
61
+ return {
62
+ hasError: true,
63
+ error:
64
+ "Failed to generate benchmark configuration. Please try again later.",
65
+ };
66
+ }
67
+
68
+ return { hasError: false };
69
+ };
70
+
71
+ const updateSteps = (logs) => {
72
+ const newCompletedSteps = [];
73
+
74
+ logs.forEach((log) => {
75
+ const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
76
+ if (match && match[1]) {
77
+ const completedStep = match[1].trim();
78
+ if (
79
+ BENCHMARK_STEPS.includes(completedStep) &&
80
+ !newCompletedSteps.includes(completedStep)
81
+ ) {
82
+ newCompletedSteps.push(completedStep);
83
+ }
84
+ }
85
+ });
86
+
87
+ let newActiveStep = activeStep;
88
+
89
+ if (newCompletedSteps.length > 0) {
90
+ const maxCompletedStepIndex = Math.max(
91
+ ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
92
+ );
93
+ const calculatedStep = maxCompletedStepIndex + 1;
94
+
95
+ if (calculatedStep > activeStep) {
96
+ newActiveStep = calculatedStep;
97
+ }
98
+
99
+ if (newActiveStep >= BENCHMARK_STEPS.length) {
100
+ newActiveStep = BENCHMARK_STEPS.length;
101
+ }
102
+ } else if (activeStep === 0) {
103
+ newActiveStep = 1;
104
+ }
105
+
106
+ return { newCompletedSteps, newActiveStep };
107
+ };
108
+
109
+ const updatePhase = (logs) => {
110
+ const recentLogs = logs.slice(-10);
111
+
112
+ const isComplete = recentLogs.some((log) =>
113
+ log.includes("[SUCCESS] Benchmark process completed successfully")
114
+ );
115
+
116
+ if (isComplete) {
117
+ return "complete";
118
+ } else if (
119
+ recentLogs.some((log) => log.includes("Starting ingestion process"))
120
+ ) {
121
+ return "benchmarking";
122
+ } else if (
123
+ recentLogs.some((log) => log.includes("Generating base configuration"))
124
+ ) {
125
+ return "configuring";
126
+ }
127
+
128
+ return currentPhase;
129
+ };
130
+
131
+ useEffect(() => {
132
+ if (generationLogs.length === 0) return;
133
+
134
+ const errorCheck = checkForErrors(generationLogs);
135
+ if (errorCheck.hasError) {
136
+ setError(errorCheck.error);
137
+ setGenerationComplete(true);
138
+ if (onComplete) {
139
+ onComplete({
140
+ success: false,
141
+ error: errorCheck.error,
142
+ sessionId,
143
+ });
144
+ }
145
+ return;
146
+ }
147
+
148
+ const { newCompletedSteps, newActiveStep } = updateSteps(generationLogs);
149
+ const newPhase = updatePhase(generationLogs);
150
+
151
+ if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
152
+ setCompletedSteps(newCompletedSteps);
153
+ }
154
+
155
+ if (newActiveStep !== activeStep) {
156
+ setActiveStep(newActiveStep);
157
+ }
158
+
159
+ if (newPhase !== currentPhase) {
160
+ setCurrentPhase(newPhase);
161
+ }
162
+
163
+ // Vérifier si le benchmark est réellement terminé sans erreur
164
+ const recentLogs = generationLogs.slice(-10);
165
+ const isComplete = recentLogs.some((log) =>
166
+ log.includes("[SUCCESS] Benchmark process completed successfully")
167
+ );
168
+
169
+ if (isComplete) {
170
+ setGenerationComplete(true);
171
+ if (onComplete) {
172
+ onComplete({
173
+ success: true,
174
+ sessionId,
175
+ logs: generationLogs,
176
+ });
177
+ }
178
+ }
179
+ }, [generationLogs, sessionId, onComplete]);
180
+
181
+ return {
182
+ generationLogs,
183
+ setGenerationLogs,
184
+ error,
185
+ setError,
186
+ currentPhase,
187
+ completedSteps,
188
+ activeStep,
189
+ generationComplete,
190
+ setGenerationComplete,
191
+ };
192
+ };
frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useRef, useEffect } from "react";
2
+ import API_CONFIG from "../../../config/api";
3
+
4
+ export const useBenchmarkPolling = (
5
+ sessionId,
6
+ setGenerationLogs,
7
+ setGenerationComplete,
8
+ onComplete
9
+ ) => {
10
+ const pollingIntervalRef = useRef(null);
11
+
12
+ const startPolling = () => {
13
+ if (pollingIntervalRef.current) {
14
+ clearInterval(pollingIntervalRef.current);
15
+ }
16
+
17
+ pollingIntervalRef.current = setInterval(async () => {
18
+ try {
19
+ const logsResponse = await fetch(
20
+ `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
21
+ );
22
+
23
+ if (logsResponse.ok) {
24
+ const logsResult = await logsResponse.json();
25
+
26
+ if (logsResult.logs) {
27
+ setGenerationLogs((prevLogs) => {
28
+ if (logsResult.logs.length > prevLogs.length) {
29
+ return logsResult.logs;
30
+ }
31
+ return prevLogs;
32
+ });
33
+ }
34
+
35
+ // Vérifier s'il y a des erreurs dans les logs
36
+ const hasError = logsResult.logs.some(
37
+ (log) =>
38
+ log.includes("Error") ||
39
+ log.includes("ERROR") ||
40
+ log.includes("Failed") ||
41
+ log.includes("RATE_LIMIT_EXCEEDED") ||
42
+ log.includes("heavy load") ||
43
+ log.includes("rate limit")
44
+ );
45
+
46
+ if (hasError) {
47
+ setGenerationComplete(true);
48
+ clearInterval(pollingIntervalRef.current);
49
+ if (onComplete) {
50
+ onComplete({
51
+ success: false,
52
+ error:
53
+ "An error occurred during benchmark generation. Please try again later.",
54
+ sessionId,
55
+ });
56
+ }
57
+ return;
58
+ }
59
+
60
+ if (logsResult.is_completed) {
61
+ setGenerationComplete(true);
62
+ clearInterval(pollingIntervalRef.current);
63
+ if (onComplete) {
64
+ onComplete({
65
+ success: true,
66
+ sessionId,
67
+ logs: logsResult.logs,
68
+ });
69
+ }
70
+ }
71
+ } else {
72
+ const errorData = await logsResponse.json();
73
+ setGenerationComplete(true);
74
+ clearInterval(pollingIntervalRef.current);
75
+ if (onComplete) {
76
+ onComplete({
77
+ success: false,
78
+ error: errorData.error || "Unknown error",
79
+ sessionId,
80
+ });
81
+ }
82
+ }
83
+ } catch (error) {
84
+ setGenerationComplete(true);
85
+ clearInterval(pollingIntervalRef.current);
86
+ if (onComplete) {
87
+ onComplete({
88
+ success: false,
89
+ error: error.message,
90
+ sessionId,
91
+ });
92
+ }
93
+ }
94
+ }, 2000);
95
+ };
96
+
97
+ useEffect(() => {
98
+ return () => {
99
+ if (pollingIntervalRef.current) {
100
+ clearInterval(pollingIntervalRef.current);
101
+ }
102
+ };
103
+ }, []);
104
+
105
+ return { startPolling };
106
+ };
frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useRef } from "react";
2
+
3
+ const SIMULATED_LOGS = [
4
+ "[INFO] Initializing benchmark generation...",
5
+ "[INFO] Generating base configuration file...",
6
+ "[SUCCESS] Stage completed: configuration",
7
+ "[INFO] Finding available providers for models...",
8
+ "[SUCCESS] Stage completed: provider_check",
9
+ "[INFO] Starting ingestion process...",
10
+ "[SUCCESS] Stage completed: ingestion",
11
+ "[INFO] Processing document content for upload...",
12
+ "[SUCCESS] Stage completed: upload_ingest_to_hub",
13
+ "[INFO] Generating document summary...",
14
+ "[SUCCESS] Stage completed: summarization",
15
+ "[INFO] Chunking content for better analysis...",
16
+ "[SUCCESS] Stage completed: chunking",
17
+ "[INFO] Generating single-shot questions...",
18
+ "[SUCCESS] Stage completed: single_shot_question_generation",
19
+ "[SUCCESS] Benchmark process completed successfully",
20
+ ];
21
+
22
+ export const useBenchmarkSimulation = (
23
+ setGenerationLogs,
24
+ setGenerationComplete,
25
+ onComplete,
26
+ sessionId
27
+ ) => {
28
+ const simulationIntervalRef = useRef(null);
29
+ const SIMULATION_DURATION = 80000; // 20 seconds
30
+
31
+ const startSimulation = () => {
32
+ setGenerationLogs([]);
33
+ let currentStep = 0;
34
+
35
+ const addNextLog = () => {
36
+ if (currentStep < SIMULATED_LOGS.length) {
37
+ setGenerationLogs((prevLogs) => [
38
+ ...prevLogs,
39
+ SIMULATED_LOGS[currentStep],
40
+ ]);
41
+ currentStep++;
42
+
43
+ if (currentStep >= SIMULATED_LOGS.length) {
44
+ setTimeout(() => {
45
+ setGenerationComplete(true);
46
+ clearInterval(simulationIntervalRef.current);
47
+ if (onComplete) {
48
+ onComplete({
49
+ success: true,
50
+ sessionId,
51
+ logs: SIMULATED_LOGS,
52
+ });
53
+ }
54
+ }, 1000);
55
+ }
56
+ }
57
+ };
58
+
59
+ const totalSteps = SIMULATED_LOGS.length;
60
+ const intervalPerStep = SIMULATION_DURATION / totalSteps;
61
+
62
+ simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
63
+ };
64
+
65
+ return { startSimulation };
66
+ };
frontend/src/components/BenchmarkEvaluation.jsx DELETED
@@ -1,401 +0,0 @@
1
- import React, { useState, useEffect, useRef } from "react";
2
- import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
3
- import { useNavigate, useSearchParams } from "react-router-dom";
4
- import API_CONFIG from "../config/api";
5
-
6
- // Temps de simulation en millisecondes pour les documents précalculés
7
- const SIMULATION_DURATION = 120000; // 20 secondes
8
-
9
- // Intervalle de changement des messages pour les documents standards vs précalculés
10
- const MESSAGE_CHANGE_INTERVAL = {
11
- DEFAULT: 25000, // 20 secondes pour documents standards
12
- PRECALCULATED: 25000, // 5 secondes pour documents précalculés
13
- };
14
-
15
- // Starting messages with their timing
16
- const STARTING_MESSAGES = [
17
- { message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
18
- { message: "Finding available model providers", step: 2, totalSteps: 5 },
19
- { message: "Starting evaluation process", step: 3, totalSteps: 5 },
20
- { message: "Evaluating models", step: 4, totalSteps: 5 },
21
- { message: "Storing evaluation results", step: 5, totalSteps: 5 },
22
- ];
23
-
24
- const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
25
- const [searchParams] = useSearchParams();
26
- const isDefault =
27
- isDefaultDocument ||
28
- ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
29
- const [evaluationComplete, setEvaluationComplete] = useState(false);
30
- const [error, setError] = useState(null);
31
- const [elapsedTime, setElapsedTime] = useState(0);
32
- const [startingMessageIndex, setStartingMessageIndex] = useState(0);
33
- const [evaluationStarted, setEvaluationStarted] = useState(false);
34
-
35
- const timerIntervalRef = useRef(null);
36
- const startTimeRef = useRef(null);
37
- const startingMessageIntervalRef = useRef(null);
38
- const pollingIntervalRef = useRef(null);
39
- const simulationTimeoutRef = useRef(null);
40
-
41
- const navigate = useNavigate();
42
-
43
- // Add effect to handle automatic redirection when evaluation is complete
44
- useEffect(() => {
45
- if (evaluationComplete) {
46
- navigate(`/evaluation-display?session=${sessionId}`);
47
- }
48
- }, [evaluationComplete, sessionId, navigate]);
49
-
50
- // Add effect to handle starting messages
51
- useEffect(() => {
52
- // Ne configurer l'intervalle automatique que pour les documents par défaut
53
- // Pour les évaluations réelles, on se fiera uniquement aux mises à jour de l'API
54
- if (isDefault) {
55
- startingMessageIntervalRef.current = setInterval(() => {
56
- setStartingMessageIndex((prev) => {
57
- if (prev < STARTING_MESSAGES.length - 1) {
58
- return prev + 1;
59
- }
60
- return prev;
61
- });
62
- }, MESSAGE_CHANGE_INTERVAL.PRECALCULATED);
63
- }
64
-
65
- return () => {
66
- if (startingMessageIntervalRef.current) {
67
- clearInterval(startingMessageIntervalRef.current);
68
- }
69
- };
70
- }, [isDefault]);
71
-
72
- // Start evaluation when component mounts
73
- useEffect(() => {
74
- // Set start time
75
- startTimeRef.current = Date.now();
76
-
77
- // Start timer
78
- timerIntervalRef.current = setInterval(() => {
79
- const timeElapsed = Math.floor(
80
- (Date.now() - startTimeRef.current) / 1000
81
- );
82
- setElapsedTime(timeElapsed);
83
- }, 1000);
84
-
85
- // Gestionnaire pour détecter quand la page redevient visible
86
- const handleVisibilityChange = () => {
87
- if (
88
- document.visibilityState === "visible" &&
89
- !isDefault &&
90
- !evaluationComplete &&
91
- evaluationStarted // Vérifier si l'évaluation a déjà commencé
92
- ) {
93
- console.log("Page became visible, checking evaluation status...");
94
- // Force une nouvelle requête pour récupérer l'état d'évaluation
95
- const checkEvaluationStatus = async () => {
96
- try {
97
- const logsResponse = await fetch(
98
- `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
99
- );
100
-
101
- if (logsResponse.ok) {
102
- const logsResult = await logsResponse.json();
103
- if (logsResult.is_completed) {
104
- // Mettre fin à l'évaluation si elle est terminée
105
- setEvaluationComplete(true);
106
-
107
- // Avancer à la dernière étape des messages
108
- setStartingMessageIndex(STARTING_MESSAGES.length - 1);
109
-
110
- // Nettoyer les intervalles
111
- if (pollingIntervalRef.current) {
112
- clearInterval(pollingIntervalRef.current);
113
- }
114
- if (startingMessageIntervalRef.current) {
115
- clearInterval(startingMessageIntervalRef.current);
116
- }
117
- } else {
118
- // Si l'évaluation est toujours en cours, utiliser l'étape actuelle du backend
119
- if (logsResult.current_step) {
120
- // Utiliser la fonction de mappage pour déterminer l'index du message
121
- const newIndex = mapStepToMessageIndex(
122
- logsResult.current_step
123
- );
124
- setStartingMessageIndex(newIndex);
125
- } else {
126
- // Fallback basé sur le temps si l'étape n'est pas disponible
127
- const progress = Math.min(
128
- Math.floor(
129
- (Date.now() - startTimeRef.current) /
130
- MESSAGE_CHANGE_INTERVAL.DEFAULT
131
- ),
132
- STARTING_MESSAGES.length - 1
133
- );
134
- setStartingMessageIndex(progress);
135
- }
136
- }
137
- }
138
- } catch (error) {
139
- console.error("Error checking evaluation status:", error);
140
- }
141
- };
142
-
143
- checkEvaluationStatus();
144
- }
145
- };
146
-
147
- // Ajouter l'écouteur pour le changement de visibilité
148
- document.addEventListener("visibilitychange", handleVisibilityChange);
149
-
150
- if (isDefault) {
151
- simulateEvaluation();
152
- } else {
153
- // Démarrer l'évaluation seulement si elle n'a pas déjà été lancée
154
- if (!evaluationStarted) {
155
- startEvaluation();
156
- }
157
- }
158
-
159
- // Clean up intervals on unmount
160
- return () => {
161
- if (pollingIntervalRef.current) {
162
- clearInterval(pollingIntervalRef.current);
163
- }
164
- if (timerIntervalRef.current) {
165
- clearInterval(timerIntervalRef.current);
166
- }
167
- if (simulationTimeoutRef.current) {
168
- clearTimeout(simulationTimeoutRef.current);
169
- }
170
- document.removeEventListener("visibilitychange", handleVisibilityChange);
171
- };
172
- }, [isDefault, sessionId, evaluationComplete, evaluationStarted]);
173
-
174
- // Simulate the evaluation process for pre-calculated documents
175
- const simulateEvaluation = () => {
176
- // Complete after 20 seconds
177
- simulationTimeoutRef.current = setTimeout(() => {
178
- setEvaluationComplete(true);
179
-
180
- if (startingMessageIntervalRef.current) {
181
- clearInterval(startingMessageIntervalRef.current);
182
- }
183
-
184
- setStartingMessageIndex(STARTING_MESSAGES.length - 1); // Set to last message
185
- }, SIMULATION_DURATION);
186
- };
187
-
188
- // Format elapsed time as HH:MM:SS
189
- const formatElapsedTime = () => {
190
- const hours = Math.floor(elapsedTime / 3600);
191
- const minutes = Math.floor((elapsedTime % 3600) / 60);
192
- const seconds = elapsedTime % 60;
193
-
194
- return [
195
- hours.toString().padStart(2, "0"),
196
- minutes.toString().padStart(2, "0"),
197
- seconds.toString().padStart(2, "0"),
198
- ].join(":");
199
- };
200
-
201
- // Fonction pour mapper le nom de l'étape backend vers l'index dans STARTING_MESSAGES
202
- const mapStepToMessageIndex = (currentStep) => {
203
- switch (currentStep) {
204
- case "initializing":
205
- return 0;
206
- case "finding_available_model_providers":
207
- return 1;
208
- case "starting_evaluation_process":
209
- return 2;
210
- case "evaluating_models":
211
- return 3;
212
- case "storing_evaluation_results":
213
- case "completed":
214
- return 4;
215
- default:
216
- // Calculer l'étape en fonction du temps écoulé si l'étape n'est pas reconnue
217
- const elapsedSinceStart = Date.now() - startTimeRef.current;
218
- const estimatedTotalTime = 80000; // 80 secondes
219
- const estimatedProgress = Math.min(
220
- elapsedSinceStart / estimatedTotalTime,
221
- 1
222
- );
223
- return Math.min(
224
- Math.floor(estimatedProgress * STARTING_MESSAGES.length),
225
- STARTING_MESSAGES.length - 1
226
- );
227
- }
228
- };
229
-
230
- // Start benchmark evaluation
231
- const startEvaluation = async () => {
232
- if (!sessionId) {
233
- setError("Missing session ID");
234
- return;
235
- }
236
-
237
- // Marquer que l'évaluation a commencé
238
- setEvaluationStarted(true);
239
-
240
- try {
241
- // Call API to start evaluation
242
- const response = await fetch(
243
- `${API_CONFIG.BASE_URL}/evaluate-benchmark`,
244
- {
245
- method: "POST",
246
- headers: {
247
- "Content-Type": "application/json",
248
- },
249
- body: JSON.stringify({
250
- session_id: sessionId,
251
- }),
252
- }
253
- );
254
-
255
- const result = await response.json();
256
-
257
- if (response.ok) {
258
- // Set up polling to check completion
259
- pollingIntervalRef.current = setInterval(async () => {
260
- try {
261
- const logsResponse = await fetch(
262
- `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
263
- );
264
-
265
- if (logsResponse.ok) {
266
- const logsResult = await logsResponse.json();
267
-
268
- // Vérifier si l'évaluation est terminée
269
- if (logsResult.is_completed) {
270
- setEvaluationComplete(true);
271
-
272
- // Avancer à la dernière étape du message
273
- setStartingMessageIndex(STARTING_MESSAGES.length - 1);
274
-
275
- // Arrêter les intervalles
276
- clearInterval(pollingIntervalRef.current);
277
- if (startingMessageIntervalRef.current) {
278
- clearInterval(startingMessageIntervalRef.current);
279
- }
280
- } else {
281
- // Récupérer l'étape actuelle à partir de l'API, si disponible
282
- if (logsResult.current_step) {
283
- // Utiliser la fonction de mappage pour déterminer l'index du message
284
- const newIndex = mapStepToMessageIndex(
285
- logsResult.current_step
286
- );
287
- setStartingMessageIndex(newIndex);
288
- } else {
289
- // Fallback: Si l'API ne renvoie pas d'étape, estimer en fonction du temps
290
- const elapsedSinceStart = Date.now() - startTimeRef.current;
291
- const estimatedTotalTime = 80000; // 80 secondes
292
- const estimatedProgress = Math.min(
293
- elapsedSinceStart / estimatedTotalTime,
294
- 1
295
- );
296
- const estimatedStepIndex = Math.min(
297
- Math.floor(estimatedProgress * STARTING_MESSAGES.length),
298
- STARTING_MESSAGES.length - 1
299
- );
300
- setStartingMessageIndex(estimatedStepIndex);
301
- }
302
- }
303
- }
304
- } catch (error) {
305
- console.log("Error polling logs:", error);
306
- // Ne pas arrêter le polling en cas d'erreurs réseau temporaires
307
- }
308
- }, 2000);
309
- } else {
310
- setError(result.error || "Benchmark evaluation failed");
311
- }
312
- } catch (error) {
313
- console.error("Error starting evaluation:", error);
314
- setError("Error connecting to server");
315
- }
316
- };
317
-
318
- return (
319
- <Paper
320
- elevation={3}
321
- sx={{
322
- p: 4,
323
- mt: 3,
324
- mb: 3,
325
- display: "flex",
326
- flexDirection: "column",
327
- alignItems: "center",
328
- justifyContent: "center",
329
- minHeight: 200,
330
- position: "relative",
331
- }}
332
- >
333
- {/* Temps estimé */}
334
- <Box
335
- sx={{
336
- position: "absolute",
337
- top: 12,
338
- right: 12,
339
- backgroundColor: "rgba(0, 0, 0, 0.04)",
340
- borderRadius: "4px",
341
- px: 1,
342
- py: 0.5,
343
- display: "inline-flex",
344
- alignItems: "center",
345
- }}
346
- >
347
- <Typography
348
- variant="caption"
349
- sx={{
350
- fontSize: "0.675rem",
351
- color: "text.secondary",
352
- fontWeight: 500,
353
- }}
354
- >
355
- Estimated time ~ 1m30s
356
- </Typography>
357
- </Box>
358
-
359
- {error ? (
360
- <Alert severity="error" sx={{ width: "100%" }}>
361
- {error}
362
- </Alert>
363
- ) : (
364
- <>
365
- {evaluationComplete ? (
366
- <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
367
- Evaluation completed successfully!
368
- </Alert>
369
- ) : (
370
- <>
371
- <CircularProgress size={60} sx={{ mb: 2 }} />
372
- <Typography variant="h6" component="div" gutterBottom>
373
- Benchmark evaluation...
374
- </Typography>
375
-
376
- {/* Step progress indicator */}
377
- <Typography variant="body1" color="text.secondary">
378
- {`${STARTING_MESSAGES[startingMessageIndex].message} (${STARTING_MESSAGES[startingMessageIndex].step}/${STARTING_MESSAGES[startingMessageIndex].totalSteps})`}
379
- </Typography>
380
-
381
- {/* Timer display */}
382
- <Box
383
- sx={{
384
- display: "flex",
385
- alignItems: "center",
386
- mt: 1,
387
- color: "text.secondary",
388
- opacity: 0.5,
389
- }}
390
- >
391
- <Typography variant="body2">{formatElapsedTime()}</Typography>
392
- </Box>
393
- </>
394
- )}
395
- </>
396
- )}
397
- </Paper>
398
- );
399
- };
400
-
401
- export default BenchmarkEvaluation;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx} RENAMED
@@ -18,6 +18,7 @@ import {
18
  } from "@mui/material";
19
  import OpenInNewIcon from "@mui/icons-material/OpenInNew";
20
  import CheckCircleIcon from "@mui/icons-material/CheckCircle";
 
21
 
22
  // Styles pour les médailles
23
  const MEDAL_STYLES = {
@@ -85,7 +86,7 @@ const getMedalStyle = (rank) => {
85
  };
86
  };
87
 
88
- const EvaluationDisplay = ({ sessionId, results }) => {
89
  // Format accuracy as percentage
90
  const formatAccuracy = (value) => {
91
  return `${(value * 100).toFixed(2)}\u2009%`;
@@ -118,9 +119,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
118
  results.models_comparison.length === 0
119
  ) {
120
  return (
121
- <Alert severity="info" sx={{ mt: 4, mb: 4 }}>
122
- The demo is currently under heavy load, please try again later.
123
- </Alert>
 
124
  );
125
  }
126
 
@@ -130,9 +132,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
130
  );
131
  if (successfulModels.length === 0) {
132
  return (
133
- <Alert severity="warning" sx={{ mt: 4, mb: 4 }}>
134
- The demo is currently under heavy load, please try again later.
135
- </Alert>
 
136
  );
137
  }
138
 
@@ -295,4 +298,4 @@ const EvaluationDisplay = ({ sessionId, results }) => {
295
  );
296
  };
297
 
298
- export default EvaluationDisplay;
 
18
  } from "@mui/material";
19
  import OpenInNewIcon from "@mui/icons-material/OpenInNew";
20
  import CheckCircleIcon from "@mui/icons-material/CheckCircle";
21
+ import ErrorDisplay from "../common/ErrorDisplay";
22
 
23
  // Styles pour les médailles
24
  const MEDAL_STYLES = {
 
86
  };
87
  };
88
 
89
+ const Display = ({ sessionId, results }) => {
90
  // Format accuracy as percentage
91
  const formatAccuracy = (value) => {
92
  return `${(value * 100).toFixed(2)}\u2009%`;
 
119
  results.models_comparison.length === 0
120
  ) {
121
  return (
122
+ <ErrorDisplay
123
+ error="The demo is currently under heavy load, please try again later."
124
+ title="Service Unavailable"
125
+ />
126
  );
127
  }
128
 
 
132
  );
133
  if (successfulModels.length === 0) {
134
  return (
135
+ <ErrorDisplay
136
+ error="The demo is currently under heavy load, please try again later."
137
+ title="Service Unavailable"
138
+ />
139
  );
140
  }
141
 
 
298
  );
299
  };
300
 
301
+ export default Display;
frontend/src/components/Evaluation/Evaluation.jsx ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
3
+ import { useNavigate, useSearchParams } from "react-router-dom";
4
+ import ErrorOutlineIcon from "@mui/icons-material/ErrorOutline";
5
+ import { useSimulation } from "./hooks/useSimulation";
6
+ import { useTimer } from "./hooks/useTimer";
7
+ import { useEvaluation } from "./hooks/useEvaluation";
8
+ import ErrorDisplay from "../common/ErrorDisplay";
9
+
10
+ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
11
+ const [searchParams] = useSearchParams();
12
+ const isDefault =
13
+ isDefaultDocument ||
14
+ ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
15
+
16
+ const navigate = useNavigate();
17
+
18
+ // Use our custom hooks
19
+ const { formatElapsedTime, stopTimer } = useTimer();
20
+ const {
21
+ startingMessageIndex,
22
+ evaluationComplete: simulationComplete,
23
+ currentMessage,
24
+ } = useSimulation(() => {
25
+ if (onComplete) {
26
+ onComplete();
27
+ }
28
+ });
29
+ const {
30
+ error,
31
+ evaluationComplete: realComplete,
32
+ currentStep,
33
+ evaluationStarted,
34
+ startEvaluation,
35
+ currentStepLabel,
36
+ totalSteps,
37
+ } = useEvaluation(sessionId, () => {
38
+ if (onComplete) {
39
+ onComplete();
40
+ }
41
+ });
42
+
43
+ // Handle automatic redirection when evaluation is complete
44
+ React.useEffect(() => {
45
+ if (realComplete || simulationComplete) {
46
+ navigate(`/evaluation-display?session=${sessionId}`);
47
+ }
48
+ }, [realComplete, simulationComplete, sessionId, navigate]);
49
+
50
+ // Start evaluation if not default and not started
51
+ React.useEffect(() => {
52
+ if (!isDefault && !evaluationStarted) {
53
+ startEvaluation();
54
+ }
55
+ }, [isDefault, evaluationStarted, startEvaluation]);
56
+
57
+ // Stop timer when complete
58
+ React.useEffect(() => {
59
+ if (realComplete || simulationComplete) {
60
+ stopTimer();
61
+ }
62
+ }, [realComplete, simulationComplete, stopTimer]);
63
+
64
+ const isComplete = realComplete || simulationComplete;
65
+ const currentStepInfo = isDefault
66
+ ? `${currentMessage.message} (${currentMessage.step}/${currentMessage.totalSteps})`
67
+ : `${currentStepLabel} (${currentStep + 1}/${totalSteps})`;
68
+
69
+ return (
70
+ <Paper
71
+ elevation={3}
72
+ sx={{
73
+ p: 4,
74
+ mt: 3,
75
+ mb: 3,
76
+ display: "flex",
77
+ flexDirection: "column",
78
+ alignItems: "center",
79
+ justifyContent: "center",
80
+ minHeight: 200,
81
+ position: "relative",
82
+ }}
83
+ >
84
+ {/* Temps estimé */}
85
+ <Box
86
+ sx={{
87
+ position: "absolute",
88
+ top: 12,
89
+ right: 12,
90
+ backgroundColor: "rgba(0, 0, 0, 0.04)",
91
+ borderRadius: "4px",
92
+ px: 1,
93
+ py: 0.5,
94
+ display: "inline-flex",
95
+ alignItems: "center",
96
+ }}
97
+ >
98
+ <Typography
99
+ variant="caption"
100
+ sx={{
101
+ fontSize: "0.675rem",
102
+ color: "text.secondary",
103
+ fontWeight: 500,
104
+ }}
105
+ >
106
+ Estimated time ~ 1m30s
107
+ </Typography>
108
+ </Box>
109
+
110
+ {error ? (
111
+ <ErrorDisplay error={error} />
112
+ ) : (
113
+ <>
114
+ {isComplete ? (
115
+ <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
116
+ Evaluation completed successfully!
117
+ </Alert>
118
+ ) : (
119
+ <>
120
+ <CircularProgress size={60} sx={{ mb: 2 }} />
121
+ <Typography variant="h6" component="div" gutterBottom>
122
+ Benchmark evaluation...
123
+ </Typography>
124
+
125
+ {/* Step progress indicator */}
126
+ <Typography variant="body1" color="text.secondary">
127
+ {currentStepInfo}
128
+ </Typography>
129
+
130
+ {/* Timer display */}
131
+ <Box
132
+ sx={{
133
+ display: "flex",
134
+ alignItems: "center",
135
+ mt: 1,
136
+ color: "text.secondary",
137
+ opacity: 0.5,
138
+ }}
139
+ >
140
+ <Typography variant="body2">{formatElapsedTime()}</Typography>
141
+ </Box>
142
+ </>
143
+ )}
144
+ </>
145
+ )}
146
+ </Paper>
147
+ );
148
+ };
149
+
150
+ export default BenchmarkEvaluation;
frontend/src/components/Evaluation/hooks/useEvaluation.js ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useRef, useEffect } from "react";
2
+ import API_CONFIG from "../../../config/api";
3
+
4
+ // Define all evaluation steps in sequence
5
+ const EVALUATION_STEPS = [
6
+ "initializing",
7
+ "finding_available_model_providers",
8
+ "starting_evaluation_process",
9
+ "evaluating_models",
10
+ "storing_evaluation_results",
11
+ ];
12
+
13
+ // Step labels for display
14
+ const STEP_LABELS = {
15
+ initializing: "Initializing evaluation environment",
16
+ finding_available_model_providers: "Finding available model providers",
17
+ starting_evaluation_process: "Starting evaluation process",
18
+ evaluating_models: "Evaluating models",
19
+ storing_evaluation_results: "Storing evaluation results",
20
+ };
21
+
22
+ // Error messages that should be treated as errors
23
+ const ERROR_MESSAGES = [
24
+ "heavy load",
25
+ "try again later",
26
+ "rate limit",
27
+ "RATE_LIMIT_EXCEEDED",
28
+ ];
29
+
30
+ export const useEvaluation = (sessionId, onComplete) => {
31
+ const [error, setError] = useState(null);
32
+ const [evaluationComplete, setEvaluationComplete] = useState(false);
33
+ const [currentStep, setCurrentStep] = useState(0);
34
+ const [evaluationStarted, setEvaluationStarted] = useState(false);
35
+ const pollingIntervalRef = useRef(null);
36
+
37
+ const mapStepToIndex = (step) => {
38
+ return EVALUATION_STEPS.indexOf(step);
39
+ };
40
+
41
+ const checkForErrors = (logs) => {
42
+ if (!logs) return false;
43
+
44
+ const hasError = ERROR_MESSAGES.some((errorMessage) =>
45
+ logs.some((log) => log.toLowerCase().includes(errorMessage.toLowerCase()))
46
+ );
47
+
48
+ if (hasError) {
49
+ setError(
50
+ "The demo is currently under heavy load, please try again later."
51
+ );
52
+ setEvaluationComplete(true);
53
+ if (pollingIntervalRef.current) {
54
+ clearInterval(pollingIntervalRef.current);
55
+ }
56
+ return true;
57
+ }
58
+ return false;
59
+ };
60
+
61
+ const startEvaluation = async () => {
62
+ if (!sessionId) {
63
+ setError("Missing session ID");
64
+ return;
65
+ }
66
+
67
+ setEvaluationStarted(true);
68
+
69
+ try {
70
+ const response = await fetch(
71
+ `${API_CONFIG.BASE_URL}/evaluate-benchmark`,
72
+ {
73
+ method: "POST",
74
+ headers: {
75
+ "Content-Type": "application/json",
76
+ },
77
+ body: JSON.stringify({
78
+ session_id: sessionId,
79
+ }),
80
+ }
81
+ );
82
+
83
+ const result = await response.json();
84
+
85
+ if (response.ok) {
86
+ setupPolling();
87
+ } else {
88
+ setError(result.error || "Benchmark evaluation failed");
89
+ }
90
+ } catch (error) {
91
+ console.error("Error starting evaluation:", error);
92
+ setError("Error connecting to server");
93
+ }
94
+ };
95
+
96
+ const setupPolling = () => {
97
+ pollingIntervalRef.current = setInterval(async () => {
98
+ try {
99
+ const logsResponse = await fetch(
100
+ `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
101
+ );
102
+
103
+ if (logsResponse.ok) {
104
+ const logsResult = await logsResponse.json();
105
+
106
+ // Check for error messages in logs
107
+ if (checkForErrors(logsResult.logs)) {
108
+ return;
109
+ }
110
+
111
+ if (logsResult.is_completed) {
112
+ setEvaluationComplete(true);
113
+ clearInterval(pollingIntervalRef.current);
114
+ if (onComplete) {
115
+ onComplete();
116
+ }
117
+ } else if (logsResult.current_step) {
118
+ const newStepIndex = mapStepToIndex(logsResult.current_step);
119
+ if (newStepIndex !== -1) {
120
+ setCurrentStep(newStepIndex);
121
+ }
122
+ }
123
+ }
124
+ } catch (error) {
125
+ console.log("Error polling logs:", error);
126
+ }
127
+ }, 2000);
128
+ };
129
+
130
+ useEffect(() => {
131
+ return () => {
132
+ if (pollingIntervalRef.current) {
133
+ clearInterval(pollingIntervalRef.current);
134
+ }
135
+ };
136
+ }, []);
137
+
138
+ return {
139
+ error,
140
+ evaluationComplete,
141
+ currentStep,
142
+ evaluationStarted,
143
+ startEvaluation,
144
+ currentStepLabel:
145
+ STEP_LABELS[EVALUATION_STEPS[currentStep]] || "Processing",
146
+ totalSteps: EVALUATION_STEPS.length,
147
+ };
148
+ };
frontend/src/components/Evaluation/hooks/useSimulation.js ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useRef, useEffect } from "react";
2
+
3
+ // Simulation time in milliseconds for pre-calculated documents
4
+ const SIMULATION_DURATION = 120000; // 2 minutes
5
+
6
+ // Starting messages with their timing
7
+ const STARTING_MESSAGES = [
8
+ { message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
9
+ { message: "Finding available model providers", step: 2, totalSteps: 5 },
10
+ { message: "Starting evaluation process", step: 3, totalSteps: 5 },
11
+ { message: "Evaluating models", step: 4, totalSteps: 5 },
12
+ { message: "Storing evaluation results", step: 5, totalSteps: 5 },
13
+ ];
14
+
15
+ export const useSimulation = (onComplete) => {
16
+ const [startingMessageIndex, setStartingMessageIndex] = useState(0);
17
+ const [evaluationComplete, setEvaluationComplete] = useState(false);
18
+ const simulationTimeoutRef = useRef(null);
19
+ const startingMessageIntervalRef = useRef(null);
20
+
21
+ useEffect(() => {
22
+ // Configure automatic interval for message changes
23
+ startingMessageIntervalRef.current = setInterval(() => {
24
+ setStartingMessageIndex((prev) => {
25
+ if (prev < STARTING_MESSAGES.length - 1) {
26
+ return prev + 1;
27
+ }
28
+ return prev;
29
+ });
30
+ }, SIMULATION_DURATION / STARTING_MESSAGES.length);
31
+
32
+ // Complete after simulation duration
33
+ simulationTimeoutRef.current = setTimeout(() => {
34
+ setEvaluationComplete(true);
35
+ if (startingMessageIntervalRef.current) {
36
+ clearInterval(startingMessageIntervalRef.current);
37
+ }
38
+ setStartingMessageIndex(STARTING_MESSAGES.length - 1);
39
+ if (onComplete) {
40
+ onComplete();
41
+ }
42
+ }, SIMULATION_DURATION);
43
+
44
+ return () => {
45
+ if (simulationTimeoutRef.current) {
46
+ clearTimeout(simulationTimeoutRef.current);
47
+ }
48
+ if (startingMessageIntervalRef.current) {
49
+ clearInterval(startingMessageIntervalRef.current);
50
+ }
51
+ };
52
+ }, [onComplete]);
53
+
54
+ return {
55
+ startingMessageIndex,
56
+ evaluationComplete,
57
+ currentMessage: STARTING_MESSAGES[startingMessageIndex],
58
+ };
59
+ };
frontend/src/components/Evaluation/hooks/useTimer.js ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useRef, useEffect } from "react";
2
+
3
+ export const useTimer = () => {
4
+ const [elapsedTime, setElapsedTime] = useState(0);
5
+ const timerIntervalRef = useRef(null);
6
+ const startTimeRef = useRef(null);
7
+
8
+ const startTimer = () => {
9
+ startTimeRef.current = Date.now();
10
+ timerIntervalRef.current = setInterval(() => {
11
+ const timeElapsed = Math.floor(
12
+ (Date.now() - startTimeRef.current) / 1000
13
+ );
14
+ setElapsedTime(timeElapsed);
15
+ }, 1000);
16
+ };
17
+
18
+ const stopTimer = () => {
19
+ if (timerIntervalRef.current) {
20
+ clearInterval(timerIntervalRef.current);
21
+ }
22
+ };
23
+
24
+ const formatElapsedTime = () => {
25
+ const hours = Math.floor(elapsedTime / 3600);
26
+ const minutes = Math.floor((elapsedTime % 3600) / 60);
27
+ const seconds = elapsedTime % 60;
28
+
29
+ return [
30
+ hours.toString().padStart(2, "0"),
31
+ minutes.toString().padStart(2, "0"),
32
+ seconds.toString().padStart(2, "0"),
33
+ ].join(":");
34
+ };
35
+
36
+ useEffect(() => {
37
+ startTimer();
38
+ return () => {
39
+ stopTimer();
40
+ };
41
+ }, []);
42
+
43
+ return {
44
+ elapsedTime,
45
+ formatElapsedTime,
46
+ stopTimer,
47
+ };
48
+ };
frontend/src/components/Footer/Footer.js CHANGED
@@ -6,21 +6,23 @@ const Footer = () => {
6
  <Box
7
  component="footer"
8
  sx={{
9
- width: "100%",
 
10
  py: 4,
11
  textAlign: "center",
 
12
  }}
13
  >
14
  <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
15
- © 2024 Hugging Face - Open LLM Leaderboard - Made with 🤗 by the HF team
16
- -{" "}
17
  <Link
18
- href="https://huggingface.co"
19
  target="_blank"
20
  rel="noopener noreferrer"
21
  color="inherit"
22
  >
23
- huggingface.co
24
  </Link>
25
  </Typography>
26
  </Box>
 
6
  <Box
7
  component="footer"
8
  sx={{
9
+ width: "70%",
10
+ margin: "0 auto",
11
  py: 4,
12
  textAlign: "center",
13
+ opacity: 0.7,
14
  }}
15
  >
16
  <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
17
+ We keep processed documents for research purposes, to which you agree by
18
+ using the space. For a fully private usage, please duplicate the{" "}
19
  <Link
20
+ href="https://huggingface.co/spaces/yourbench/advanced"
21
  target="_blank"
22
  rel="noopener noreferrer"
23
  color="inherit"
24
  >
25
+ advanced demo space
26
  </Link>
27
  </Typography>
28
  </Box>
frontend/src/components/Intro.jsx CHANGED
@@ -1,5 +1,5 @@
1
  import React from "react";
2
- import { Box, Typography } from "@mui/material";
3
  import HFLogo from "./Logo/HFLogo";
4
 
5
  const Intro = () => {
@@ -42,7 +42,19 @@ const Intro = () => {
42
  YourBench is an <b>open-source framework</b> for generating{" "}
43
  <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
44
  to keep your large language models on their toes—even as new data
45
- sources, domains, and knowledge demands evolve. Currently, this is an extremely minimal demo. To unlock the full capabilities, please visit our GitHub!
 
 
 
 
 
 
 
 
 
 
 
 
46
  </Typography>
47
  </Box>
48
  );
 
1
  import React from "react";
2
+ import { Box, Typography, Link } from "@mui/material";
3
  import HFLogo from "./Logo/HFLogo";
4
 
5
  const Intro = () => {
 
42
  YourBench is an <b>open-source framework</b> for generating{" "}
43
  <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
44
  to keep your large language models on their toes—even as new data
45
+ sources, domains, and knowledge demands evolve.
46
+ <br />
47
+ <br /> Currently, this is an <b>extremely minimal demo</b>. <br />
48
+ To <b>unlock the full capabilities</b>, please visit our{" "}
49
+ <Link
50
+ href="https://github.com/yourbench"
51
+ target="_blank"
52
+ rel="noopener noreferrer"
53
+ color="inherit"
54
+ >
55
+ <b>GitHub</b>
56
+ </Link>
57
+ !
58
  </Typography>
59
  </Box>
60
  );
frontend/src/components/KeyboardShortcuts.jsx DELETED
@@ -1,24 +0,0 @@
1
- import React, { useEffect } from "react";
2
-
3
- function KeyboardShortcuts() {
4
- useEffect(() => {
5
- const handleKeyDown = (e) => {
6
- if (e.key === "p") {
7
- console.log("Debug key pressed: Clearing auth data and refreshing");
8
- localStorage.removeItem("hf_oauth");
9
- localStorage.removeItem("auth_return_to");
10
- alert("Auth data cleared. Page will reload.");
11
- window.location.reload();
12
- }
13
- };
14
-
15
- window.addEventListener("keydown", handleKeyDown);
16
- return () => {
17
- window.removeEventListener("keydown", handleKeyDown);
18
- };
19
- }, []);
20
-
21
- return null;
22
- }
23
-
24
- export default KeyboardShortcuts;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/{ExternalLinks.jsx → Navigation.jsx} RENAMED
@@ -13,7 +13,7 @@ import OpenInNewIcon from "@mui/icons-material/OpenInNew";
13
  import ShareIcon from "@mui/icons-material/Share";
14
  import MenuIcon from "@mui/icons-material/Menu";
15
 
16
- const ExternalLinks = () => {
17
  const [anchorEl, setAnchorEl] = useState(null);
18
  const theme = useTheme();
19
  const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
@@ -44,7 +44,7 @@ const ExternalLinks = () => {
44
  url: "https://github.com/huggingface/yourbench",
45
  },
46
  {
47
- name: "Full demo",
48
  url: "https://huggingface.co/spaces/yourbench/advanced",
49
  },
50
  ];
@@ -175,4 +175,4 @@ const ExternalLinks = () => {
175
  );
176
  };
177
 
178
- export default ExternalLinks;
 
13
  import ShareIcon from "@mui/icons-material/Share";
14
  import MenuIcon from "@mui/icons-material/Menu";
15
 
16
+ const Navigation = () => {
17
  const [anchorEl, setAnchorEl] = useState(null);
18
  const theme = useTheme();
19
  const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
 
44
  url: "https://github.com/huggingface/yourbench",
45
  },
46
  {
47
+ name: "Advanced demo",
48
  url: "https://huggingface.co/spaces/yourbench/advanced",
49
  },
50
  ];
 
175
  );
176
  };
177
 
178
+ export default Navigation;
frontend/src/components/common/ErrorDisplay.jsx ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import { Box, Typography } from "@mui/material";
3
+ import SentimentVeryDissatisfiedIcon from "@mui/icons-material/SentimentVeryDissatisfied";
4
+
5
+ /**
6
+ * Generic error display component with centered icon and text
7
+ * @param {Object} props
8
+ * @param {string} props.error - The error message to display
9
+ * @param {string} [props.title="Error"] - Optional custom title
10
+ * @param {Object} [props.sx={}] - Optional additional styles
11
+ */
12
+ const ErrorDisplay = ({ error, title = "Error", sx = {} }) => {
13
+ return (
14
+ <Box
15
+ sx={{
16
+ display: "flex",
17
+ flexDirection: "column",
18
+ alignItems: "center",
19
+ justifyContent: "center",
20
+ p: 4,
21
+ gap: 2,
22
+ ...sx,
23
+ }}
24
+ >
25
+ <SentimentVeryDissatisfiedIcon
26
+ sx={{ fontSize: 60, color: "warning.main" }}
27
+ />
28
+ <Typography variant="h6" color="warning">
29
+ {title}
30
+ </Typography>
31
+ <Typography
32
+ variant="body1"
33
+ align="center"
34
+ color="text.secondary"
35
+ sx={{ maxWidth: "80%", lineHeight: 1.5 }}
36
+ >
37
+ {error}
38
+ </Typography>
39
+ </Box>
40
+ );
41
+ };
42
+
43
+ export default ErrorDisplay;
frontend/src/components/shared/AuthContainer.js DELETED
@@ -1,192 +0,0 @@
1
- import React, { useEffect } from "react";
2
- import {
3
- Box,
4
- Typography,
5
- Button,
6
- Chip,
7
- Stack,
8
- Paper,
9
- CircularProgress,
10
- useTheme,
11
- useMediaQuery,
12
- } from "@mui/material";
13
- import HFLogo from "../Logo/HFLogo";
14
- import { useAuth } from "../../hooks/useAuth";
15
- import LogoutIcon from "@mui/icons-material/Logout";
16
- import { useNavigate } from "react-router-dom";
17
-
18
- function AuthContainer({ actionText = "DO_ACTION", onSuccess }) {
19
- const { isAuthenticated, user, login, logout, loading } = useAuth();
20
- const navigate = useNavigate();
21
- const theme = useTheme();
22
- const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
23
-
24
- // Trigger onSuccess callback when user is authenticated
25
- useEffect(() => {
26
- if (isAuthenticated && onSuccess) {
27
- // Add a small delay to ensure UI is updated properly
28
- setTimeout(() => {
29
- console.log("User is authenticated, calling onSuccess callback");
30
- onSuccess();
31
- }, 100);
32
- }
33
- }, [isAuthenticated, onSuccess]);
34
-
35
- // Check localStorage manually as a fallback
36
- useEffect(() => {
37
- if (!isAuthenticated && !loading && onSuccess) {
38
- const storedAuth = localStorage.getItem("hf_oauth");
39
- if (storedAuth) {
40
- console.log(
41
- "Found auth data in localStorage but isAuthenticated is false, forcing onSuccess"
42
- );
43
- onSuccess();
44
- }
45
- }
46
- }, [isAuthenticated, loading, onSuccess]);
47
-
48
- const handleLogout = () => {
49
- if (isAuthenticated && logout) {
50
- logout();
51
- navigate("/", { replace: true });
52
- window.location.reload();
53
- }
54
- };
55
-
56
- if (loading) {
57
- return (
58
- <Paper
59
- elevation={0}
60
- sx={{
61
- p: 3,
62
- mb: 4,
63
- border: "1px solid",
64
- borderColor: "grey.300",
65
- display: "flex",
66
- flexDirection: "column",
67
- alignItems: "center",
68
- gap: 2,
69
- }}
70
- >
71
- <CircularProgress size={24} />
72
- </Paper>
73
- );
74
- }
75
-
76
- if (!isAuthenticated) {
77
- return (
78
- <Paper
79
- elevation={0}
80
- sx={{
81
- p: 3,
82
- mb: 4,
83
- border: "1px solid",
84
- borderColor: "grey.300",
85
- display: "flex",
86
- flexDirection: "column",
87
- alignItems: "center",
88
- gap: 2,
89
- }}
90
- >
91
- <Typography variant="h6" align="center">
92
- Login to {actionText}
93
- </Typography>
94
- <Typography
95
- variant="body2"
96
- color="text.secondary"
97
- align="center"
98
- sx={{
99
- px: isMobile ? 2 : 0,
100
- }}
101
- >
102
- You need to be logged in with your Hugging Face account to{" "}
103
- {actionText.toLowerCase()}
104
- </Typography>
105
- <Button
106
- variant="contained"
107
- onClick={login}
108
- startIcon={
109
- <Box
110
- sx={{
111
- width: 20,
112
- height: 20,
113
- display: "flex",
114
- alignItems: "center",
115
- }}
116
- >
117
- <HFLogo />
118
- </Box>
119
- }
120
- sx={{
121
- textTransform: "none",
122
- fontWeight: 600,
123
- py: 1,
124
- px: 2,
125
- width: isMobile ? "100%" : "auto",
126
- }}
127
- >
128
- Sign in with Hugging Face
129
- </Button>
130
- </Paper>
131
- );
132
- }
133
-
134
- return (
135
- <Paper
136
- elevation={0}
137
- sx={{ p: 2, border: "1px solid", borderColor: "grey.300", mb: 4 }}
138
- >
139
- <Stack
140
- direction={isMobile ? "column" : "row"}
141
- spacing={2}
142
- alignItems={isMobile ? "stretch" : "center"}
143
- justifyContent="space-between"
144
- >
145
- <Stack
146
- direction={isMobile ? "column" : "row"}
147
- spacing={1}
148
- alignItems={isMobile ? "stretch" : "center"}
149
- sx={{ width: "100%" }}
150
- >
151
- <Typography
152
- variant="body1"
153
- align={isMobile ? "center" : "left"}
154
- sx={{ mb: isMobile ? 1 : 0 }}
155
- >
156
- Connected as <strong>{user?.username}</strong>
157
- </Typography>
158
- <Chip
159
- label={`Ready to ${actionText}`}
160
- color="success"
161
- size="small"
162
- variant="outlined"
163
- sx={{
164
- width: isMobile ? "100%" : "auto",
165
- height: isMobile ? 32 : 24,
166
- "& .MuiChip-label": {
167
- px: isMobile ? 2 : 1,
168
- },
169
- }}
170
- />
171
- </Stack>
172
- <Button
173
- variant="contained"
174
- onClick={handleLogout}
175
- endIcon={<LogoutIcon />}
176
- color="primary"
177
- sx={{
178
- minWidth: 120,
179
- height: 36,
180
- textTransform: "none",
181
- fontSize: "0.9375rem",
182
- width: isMobile ? "100%" : "auto",
183
- }}
184
- >
185
- Logout
186
- </Button>
187
- </Stack>
188
- </Paper>
189
- );
190
- }
191
-
192
- export default AuthContainer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/shared/CodeBlock.js DELETED
@@ -1,37 +0,0 @@
1
- import React from 'react';
2
- import { Box, IconButton } from '@mui/material';
3
- import ContentCopyIcon from '@mui/icons-material/ContentCopy';
4
-
5
- const CodeBlock = ({ code }) => (
6
- <Box sx={{ position: 'relative' }}>
7
- <IconButton
8
- onClick={() => navigator.clipboard.writeText(code)}
9
- sx={{
10
- position: 'absolute',
11
- top: 8,
12
- right: 8,
13
- color: 'grey.500',
14
- '&:hover': { color: 'grey.300' },
15
- }}
16
- >
17
- <ContentCopyIcon fontSize="small" />
18
- </IconButton>
19
- <Box
20
- sx={{
21
- backgroundColor: 'grey.900',
22
- color: 'grey.100',
23
- p: 2,
24
- borderRadius: 1,
25
- fontFamily: 'monospace',
26
- fontSize: '0.875rem',
27
- overflowX: 'auto',
28
- textAlign: 'left',
29
- whiteSpace: 'pre',
30
- }}
31
- >
32
- {code}
33
- </Box>
34
- </Box>
35
- );
36
-
37
- export default CodeBlock;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/shared/FilterTag.js DELETED
@@ -1,139 +0,0 @@
1
- import React from "react";
2
- import { Chip } from "@mui/material";
3
- import { useTheme } from "@mui/material/styles";
4
- import { alpha } from "@mui/material/styles";
5
- import CheckBoxOutlineBlankIcon from "@mui/icons-material/CheckBoxOutlineBlank";
6
- import CheckBoxOutlinedIcon from "@mui/icons-material/CheckBoxOutlined";
7
-
8
- const FilterTag = ({
9
- label,
10
- checked,
11
- onChange,
12
- count,
13
- isHideFilter = false,
14
- totalCount = 0,
15
- variant = "tag",
16
- showCheckbox = false,
17
- stacked = false,
18
- sx = {},
19
- }) => {
20
- const theme = useTheme();
21
-
22
- const formatCount = (count) => {
23
- if (count === undefined) return "";
24
- return `${count}`;
25
- };
26
-
27
- const mainLabel = label;
28
- const countLabel = count !== undefined ? formatCount(count) : "";
29
-
30
- return (
31
- <Chip
32
- icon={
33
- showCheckbox ? (
34
- checked ? (
35
- <CheckBoxOutlinedIcon
36
- sx={{
37
- fontSize: "1.1rem",
38
- ml: 0.8,
39
- color: checked
40
- ? variant === "secondary"
41
- ? "secondary.main"
42
- : "primary.main"
43
- : "text.secondary",
44
- }}
45
- />
46
- ) : (
47
- <CheckBoxOutlineBlankIcon
48
- sx={{
49
- fontSize: "1.1rem",
50
- ml: 0.8,
51
- color: "text.secondary",
52
- }}
53
- />
54
- )
55
- ) : null
56
- }
57
- label={
58
- <span>
59
- {mainLabel}
60
- {countLabel && (
61
- <>
62
- <span
63
- style={{
64
- display: "inline-block",
65
- width: "3px",
66
- height: "3px",
67
- borderRadius: "50%",
68
- backgroundColor: "currentColor",
69
- opacity: 0.2,
70
- margin: "0 4px",
71
- verticalAlign: "middle",
72
- }}
73
- />
74
- <span style={{ opacity: 0.5 }}>{countLabel}</span>
75
- </>
76
- )}
77
- </span>
78
- }
79
- onClick={onChange}
80
- variant="outlined"
81
- color={
82
- checked
83
- ? variant === "secondary"
84
- ? "secondary"
85
- : "primary"
86
- : "default"
87
- }
88
- size="small"
89
- data-checked={checked}
90
- sx={{
91
- height: "32px",
92
- fontWeight: 600,
93
- opacity: checked ? 1 : 0.8,
94
- borderRadius: "5px",
95
- borderWidth: "1px",
96
- borderStyle: "solid",
97
- cursor: "pointer",
98
- pl: showCheckbox ? 0.5 : 0,
99
- mr: 0.5,
100
- mb: 0.5,
101
- transition: "opacity 0.2s ease, border-color 0.2s ease",
102
- "& .MuiChip-label": {
103
- px: 0.75,
104
- pl: showCheckbox ? 0.6 : 0.75,
105
- },
106
- "& .MuiChip-icon": {
107
- mr: 0.5,
108
- pl: 0.2,
109
- },
110
- "&:hover": {
111
- opacity: 1,
112
- backgroundColor: checked
113
- ? alpha(
114
- theme.palette[variant === "secondary" ? "secondary" : "primary"]
115
- .main,
116
- theme.palette.mode === "light" ? 0.08 : 0.16
117
- )
118
- : "action.hover",
119
- borderWidth: "1px",
120
- },
121
- backgroundColor: checked
122
- ? alpha(
123
- theme.palette[variant === "secondary" ? "secondary" : "primary"]
124
- .main,
125
- theme.palette.mode === "light" ? 0.08 : 0.16
126
- )
127
- : "background.paper",
128
- borderColor: checked
129
- ? variant === "secondary"
130
- ? "secondary.main"
131
- : "primary.main"
132
- : "divider",
133
- ...sx,
134
- }}
135
- />
136
- );
137
- };
138
-
139
- export default FilterTag;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/shared/InfoIconWithTooltip.js DELETED
@@ -1,87 +0,0 @@
1
- import React from "react";
2
- import { Box, Tooltip, Portal, Backdrop } from "@mui/material";
3
- import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined";
4
-
5
- const InfoIconWithTooltip = ({ tooltip, iconProps = {}, sx = {} }) => {
6
- const [open, setOpen] = React.useState(false);
7
-
8
- return (
9
- <>
10
- <Tooltip
11
- title={tooltip}
12
- arrow
13
- placement="top"
14
- open={open}
15
- onOpen={() => setOpen(true)}
16
- onClose={() => setOpen(false)}
17
- componentsProps={{
18
- tooltip: {
19
- sx: {
20
- bgcolor: "rgba(33, 33, 33, 0.95)",
21
- padding: "12px 16px",
22
- maxWidth: "none !important",
23
- width: "auto",
24
- minWidth: "200px",
25
- fontSize: "0.875rem",
26
- lineHeight: 1.5,
27
- position: "relative",
28
- zIndex: 1501,
29
- "& .MuiTooltip-arrow": {
30
- color: "rgba(33, 33, 33, 0.95)",
31
- },
32
- },
33
- },
34
- popper: {
35
- sx: {
36
- zIndex: 1501,
37
- maxWidth: "min(600px, 90vw) !important",
38
- '&[data-popper-placement*="bottom"] .MuiTooltip-tooltip': {
39
- marginTop: "10px",
40
- },
41
- '&[data-popper-placement*="top"] .MuiTooltip-tooltip': {
42
- marginBottom: "10px",
43
- },
44
- },
45
- },
46
- }}
47
- >
48
- <Box
49
- component="span"
50
- sx={{
51
- opacity: 0.5,
52
- display: "flex",
53
- alignItems: "center",
54
- cursor: "help",
55
- "&:hover": { opacity: 0.8 },
56
- position: "relative",
57
- zIndex: open ? 1502 : "auto",
58
- ...sx,
59
- }}
60
- >
61
- <InfoOutlinedIcon
62
- sx={{
63
- fontSize: "1rem",
64
- ...iconProps.sx,
65
- }}
66
- {...iconProps}
67
- />
68
- </Box>
69
- </Tooltip>
70
- {open && (
71
- <Portal>
72
- <Backdrop
73
- open={true}
74
- sx={{
75
- zIndex: 1500,
76
- backgroundColor: "rgba(0, 0, 0, 0.5)",
77
- transition: "opacity 0.2s ease",
78
- pointerEvents: "none",
79
- }}
80
- />
81
- </Portal>
82
- )}
83
- </>
84
- );
85
- };
86
-
87
- export default InfoIconWithTooltip;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/shared/PageHeader.js DELETED
@@ -1,29 +0,0 @@
1
- import React from "react";
2
- import { Box, Typography } from "@mui/material";
3
-
4
- const PageHeader = ({ title, subtitle }) => {
5
- return (
6
- <Box
7
- sx={{
8
- display: "flex",
9
- flexDirection: "column",
10
- alignItems: "center",
11
- textAlign: "center",
12
- mb: 6,
13
- mt: 6,
14
- gap: 2,
15
- }}
16
- >
17
- <Typography fontWeight="bold" variant="h3" component="h1">
18
- {title}
19
- </Typography>
20
- {subtitle && (
21
- <Typography variant="h6" color="text.secondary">
22
- {subtitle}
23
- </Typography>
24
- )}
25
- </Box>
26
- );
27
- };
28
-
29
- export default PageHeader;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/pages/BenchmarkDisplayPage.jsx CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
- import BenchmarkDisplay from "../components/BenchmarkDisplay";
6
  import API_CONFIG from "../config/api";
7
  import { useThemeMode } from "../hooks/useThemeMode";
8
  import getTheme from "../config/theme";
@@ -138,7 +138,7 @@ function BenchmarkDisplayPage() {
138
  bgcolor: "background.paper",
139
  }}
140
  >
141
- <BenchmarkDisplay
142
  onStartEvaluation={handleStartEvaluation}
143
  sessionId={sessionId}
144
  datasetUrl={datasetUrl}
 
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
+ import Display from "../components/Benchmark/Display";
6
  import API_CONFIG from "../config/api";
7
  import { useThemeMode } from "../hooks/useThemeMode";
8
  import getTheme from "../config/theme";
 
138
  bgcolor: "background.paper",
139
  }}
140
  >
141
+ <Display
142
  onStartEvaluation={handleStartEvaluation}
143
  sessionId={sessionId}
144
  datasetUrl={datasetUrl}
frontend/src/pages/BenchmarkEvaluationPage.jsx CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
- import BenchmarkEvaluation from "../components/BenchmarkEvaluation";
6
  import API_CONFIG from "../config/api";
7
 
8
  function BenchmarkEvaluationPage() {
@@ -75,7 +75,7 @@ function BenchmarkEvaluationPage() {
75
  <CircularProgress size={60} />
76
  </Box>
77
  ) : (
78
- <BenchmarkEvaluation
79
  sessionId={sessionId}
80
  isDefaultDocument={isDefault}
81
  onComplete={handleEvaluationComplete}
 
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
+ import Evaluation from "../components/Evaluation/Evaluation";
6
  import API_CONFIG from "../config/api";
7
 
8
  function BenchmarkEvaluationPage() {
 
75
  <CircularProgress size={60} />
76
  </Box>
77
  ) : (
78
+ <Evaluation
79
  sessionId={sessionId}
80
  isDefaultDocument={isDefault}
81
  onComplete={handleEvaluationComplete}
frontend/src/pages/BenchmarkGenerationPage.jsx CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useRef } from "react";
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
- import BenchmarkGenerator from "../components/BenchmarkGenerator";
6
 
7
  function BenchmarkGenerationPage() {
8
  const navigate = useNavigate();
@@ -36,7 +36,7 @@ function BenchmarkGenerationPage() {
36
  return (
37
  <>
38
  <Intro />
39
- <BenchmarkGenerator
40
  sessionId={sessionId}
41
  isDefaultDocument={isDefault}
42
  onComplete={handleGenerationComplete}
 
2
  import { Box, CircularProgress } from "@mui/material";
3
  import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
+ import Generator from "../components/Benchmark/Generator";
6
 
7
  function BenchmarkGenerationPage() {
8
  const navigate = useNavigate();
 
36
  return (
37
  <>
38
  <Intro />
39
+ <Generator
40
  sessionId={sessionId}
41
  isDefaultDocument={isDefault}
42
  onComplete={handleGenerationComplete}
frontend/src/pages/EvaluationDisplayPage.jsx CHANGED
@@ -1,11 +1,12 @@
1
  import React, { useState, useEffect } from "react";
2
- import { Box, CircularProgress, Alert } from "@mui/material";
3
  import { useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
- import EvaluationDisplay from "../components/EvaluationDisplay";
6
  import { useThemeMode } from "../hooks/useThemeMode";
7
  import getTheme from "../config/theme";
8
  import API_CONFIG from "../config/api";
 
9
 
10
  function EvaluationDisplayPage() {
11
  const [searchParams] = useSearchParams();
@@ -132,9 +133,7 @@ function EvaluationDisplayPage() {
132
  <CircularProgress size={60} />
133
  </Box>
134
  ) : error ? (
135
- <Alert severity="error" sx={{ mt: 4, mb: 4 }}>
136
- {error}
137
- </Alert>
138
  ) : (
139
  <Box
140
  sx={{
@@ -144,10 +143,7 @@ function EvaluationDisplayPage() {
144
  bgcolor: "background.paper",
145
  }}
146
  >
147
- <EvaluationDisplay
148
- sessionId={sessionId}
149
- results={evaluationResults}
150
- />
151
  </Box>
152
  )}
153
  </>
 
1
  import React, { useState, useEffect } from "react";
2
+ import { Box, CircularProgress } from "@mui/material";
3
  import { useSearchParams, Navigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
+ import Display from "../components/Evaluation/Display";
6
  import { useThemeMode } from "../hooks/useThemeMode";
7
  import getTheme from "../config/theme";
8
  import API_CONFIG from "../config/api";
9
+ import ErrorDisplay from "../components/common/ErrorDisplay";
10
 
11
  function EvaluationDisplayPage() {
12
  const [searchParams] = useSearchParams();
 
133
  <CircularProgress size={60} />
134
  </Box>
135
  ) : error ? (
136
+ <ErrorDisplay error={error} title="Error" />
 
 
137
  ) : (
138
  <Box
139
  sx={{
 
143
  bgcolor: "background.paper",
144
  }}
145
  >
146
+ <Display sessionId={sessionId} results={evaluationResults} />
 
 
 
147
  </Box>
148
  )}
149
  </>
frontend/src/pages/HomePage.jsx CHANGED
@@ -2,7 +2,7 @@ import React from "react";
2
  import { Box } from "@mui/material";
3
  import { useNavigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
- import BenchmarkCreateForm from "../components/BenchmarkCreateForm";
6
  import { useThemeMode } from "../hooks/useThemeMode";
7
  import getTheme from "../config/theme";
8
 
@@ -30,7 +30,7 @@ function HomePage() {
30
  bgcolor: "background.paper",
31
  }}
32
  >
33
- <BenchmarkCreateForm onStartGeneration={handleStartGeneration} />
34
  </Box>
35
  </>
36
  );
 
2
  import { Box } from "@mui/material";
3
  import { useNavigate } from "react-router-dom";
4
  import Intro from "../components/Intro";
5
+ import CreateForm from "../components/Benchmark/CreateForm";
6
  import { useThemeMode } from "../hooks/useThemeMode";
7
  import getTheme from "../config/theme";
8
 
 
30
  bgcolor: "background.paper",
31
  }}
32
  >
33
+ <CreateForm onStartGeneration={handleStartGeneration} />
34
  </Box>
35
  </>
36
  );