tfrere commited on
Commit
95bf1fc
·
1 Parent(s): d2805fc

update yourbench error handling

Browse files
backend/config/models_config.py CHANGED
@@ -27,11 +27,6 @@ ALTERNATIVE_BENCHMARK_MODELS = [
27
  "mistralai/Mistral-Small-24B-Instruct-2501",
28
  ]
29
 
30
- # "Qwen/Qwen2.5-72B-Instruct"
31
- # "meta-llama/Llama-3.1-8B-Instruct"
32
- # "Qwen/Qwen2.5-32B-Instruct",
33
- # "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
34
-
35
  # Required model for create_bench_config_file.py (only one default model)
36
  DEFAULT_BENCHMARK_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
37
 
 
27
  "mistralai/Mistral-Small-24B-Instruct-2501",
28
  ]
29
 
 
 
 
 
 
30
  # Required model for create_bench_config_file.py (only one default model)
31
  DEFAULT_BENCHMARK_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
32
 
backend/routes/benchmark.py CHANGED
@@ -218,12 +218,25 @@ class UnifiedBenchmarkTask:
218
  self.is_completed = True
219
 
220
  # Vérifier si une erreur a été détectée dans les logs du benchmark
221
- has_error = any("[ERROR]" in log for log in final_logs)
 
 
 
 
222
  benchmark_terminated_with_error = any("Benchmark process terminated with error code" in log for log in final_logs)
223
  benchmark_already_marked_success = any("Benchmark process completed successfully" in log for log in final_logs)
224
 
225
- # N'ajouter le message de succès que si aucune erreur n'a été détectée
226
- if not has_error and not benchmark_terminated_with_error and not benchmark_already_marked_success:
 
 
 
 
 
 
 
 
 
227
  self._add_log("[SUCCESS] Benchmark process completed successfully")
228
 
229
  except Exception as config_error:
 
218
  self.is_completed = True
219
 
220
  # Vérifier si une erreur a été détectée dans les logs du benchmark
221
+ # Ignorer spécifiquement les erreurs de parsing JSON qui ne doivent pas bloquer le processus
222
+ has_error = any("[ERROR]" in log and not ("JSONDecodeError" in log or
223
+ "Error processing QA pair" in log or
224
+ "'str' object has no attribute 'get'" in log)
225
+ for log in final_logs)
226
  benchmark_terminated_with_error = any("Benchmark process terminated with error code" in log for log in final_logs)
227
  benchmark_already_marked_success = any("Benchmark process completed successfully" in log for log in final_logs)
228
 
229
+ # Même s'il y a des erreurs JSON, considérer le benchmark comme réussi
230
+ json_errors_only = any(("JSONDecodeError" in log or
231
+ "Error processing QA pair" in log or
232
+ "'str' object has no attribute 'get'" in log)
233
+ for log in final_logs) and not has_error
234
+
235
+ if json_errors_only:
236
+ self._add_log("[INFO] Benchmark completed with minor JSON parsing warnings, considered successful")
237
+
238
+ # N'ajouter le message de succès que si aucune erreur grave n'a été détectée
239
+ if (not has_error and not benchmark_terminated_with_error and not benchmark_already_marked_success) or json_errors_only:
240
  self._add_log("[SUCCESS] Benchmark process completed successfully")
241
 
242
  except Exception as config_error:
backend/tasks/create_bench.py CHANGED
@@ -109,6 +109,8 @@ class CreateBenchTask:
109
 
110
  # Flag pour détecter les erreurs de rate limiting
111
  rate_limit_detected = False
 
 
112
 
113
  try:
114
  while self.is_running() and self.process:
@@ -133,6 +135,15 @@ class CreateBenchTask:
133
  rate_limit_detected = True
134
  self._add_log("[ERROR] RATE_LIMIT_EXCEEDED: The demo is under heavy load at the moment.")
135
 
 
 
 
 
 
 
 
 
 
136
  # Log raw output for debugging
137
  self._add_log(f"[DEBUG] Raw output: {line}")
138
  # Filter and format the line as needed
@@ -160,9 +171,12 @@ class CreateBenchTask:
160
  # Check exit code once the process is finished
161
  if self.process:
162
  exit_code = self.process.poll()
163
- if exit_code == 0:
164
- # Seulement ajouter le message de succès si le code de sortie est 0
165
- self._add_log("[SUCCESS] Benchmark process completed successfully")
 
 
 
166
  else:
167
  # Si une erreur de rate limiting a été détectée, afficher un message spécifique
168
  if rate_limit_detected:
 
109
 
110
  # Flag pour détecter les erreurs de rate limiting
111
  rate_limit_detected = False
112
+ # Flag pour les erreurs JSON non critiques
113
+ json_errors_detected = False
114
 
115
  try:
116
  while self.is_running() and self.process:
 
135
  rate_limit_detected = True
136
  self._add_log("[ERROR] RATE_LIMIT_EXCEEDED: The demo is under heavy load at the moment.")
137
 
138
+ # Détecter les erreurs JSON non critiques
139
+ if ("JSONDecodeError" in line or
140
+ "Error processing QA pair" in line or
141
+ "'str' object has no attribute 'get'" in line):
142
+ json_errors_detected = True
143
+ # Ne pas les marquer comme erreurs mais comme avertissements
144
+ self._add_log(f"[WARN] Non-critical JSON error: {line}")
145
+ continue # Passer à la ligne suivante
146
+
147
  # Log raw output for debugging
148
  self._add_log(f"[DEBUG] Raw output: {line}")
149
  # Filter and format the line as needed
 
171
  # Check exit code once the process is finished
172
  if self.process:
173
  exit_code = self.process.poll()
174
+ if exit_code == 0 or json_errors_detected:
175
+ # Considérer le processus comme réussi même avec des erreurs JSON
176
+ if json_errors_detected:
177
+ self._add_log("[INFO] Benchmark completed with non-critical JSON errors, considered successful")
178
+ else:
179
+ self._add_log("[SUCCESS] Benchmark process completed successfully")
180
  else:
181
  # Si une erreur de rate limiting a été détectée, afficher un message spécifique
182
  if rate_limit_detected:
backend/tasks/get_available_model_provider.py CHANGED
@@ -105,7 +105,7 @@ def get_available_model_provider(model_name, verbose=False):
105
 
106
  # Get providers for the model and prioritize them
107
  try:
108
- info = model_info(model_name, expand="inferenceProviderMapping")
109
  if not hasattr(info, "inference_provider_mapping"):
110
  if verbose:
111
  logger.info(f"No inference providers found for {model_name}")
 
105
 
106
  # Get providers for the model and prioritize them
107
  try:
108
+ info = model_info(model_name, token=hf_token, expand="inferenceProviderMapping")
109
  if not hasattr(info, "inference_provider_mapping"):
110
  if verbose:
111
  logger.info(f"No inference providers found for {model_name}")