Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 7

Commit

95bf1fc

1 Parent(s): d2805fc

update yourbench error handling

Browse files

Files changed (4) hide show

backend/config/models_config.py +0 -5
backend/routes/benchmark.py +16 -3
backend/tasks/create_bench.py +17 -3
backend/tasks/get_available_model_provider.py +1 -1

backend/config/models_config.py CHANGED Viewed

@@ -27,11 +27,6 @@ ALTERNATIVE_BENCHMARK_MODELS = [
     "mistralai/Mistral-Small-24B-Instruct-2501",
 ]
-# "Qwen/Qwen2.5-72B-Instruct"
-# "meta-llama/Llama-3.1-8B-Instruct"
-# "Qwen/Qwen2.5-32B-Instruct",
-# "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
 # Required model for create_bench_config_file.py (only one default model)
 DEFAULT_BENCHMARK_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"

     "mistralai/Mistral-Small-24B-Instruct-2501",
 ]
 # Required model for create_bench_config_file.py (only one default model)
 DEFAULT_BENCHMARK_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"

backend/routes/benchmark.py CHANGED Viewed

@@ -218,12 +218,25 @@ class UnifiedBenchmarkTask:
                 self.is_completed = True
                 # Vérifier si une erreur a été détectée dans les logs du benchmark
-                has_error = any("[ERROR]" in log for log in final_logs)
                 benchmark_terminated_with_error = any("Benchmark process terminated with error code" in log for log in final_logs)
                 benchmark_already_marked_success = any("Benchmark process completed successfully" in log for log in final_logs)
-                # N'ajouter le message de succès que si aucune erreur n'a été détectée
-                if not has_error and not benchmark_terminated_with_error and not benchmark_already_marked_success:
                     self._add_log("[SUCCESS] Benchmark process completed successfully")
             except Exception as config_error:

                 self.is_completed = True
                 # Vérifier si une erreur a été détectée dans les logs du benchmark
+                # Ignorer spécifiquement les erreurs de parsing JSON qui ne doivent pas bloquer le processus
+                has_error = any("[ERROR]" in log and not ("JSONDecodeError" in log or
+                                                          "Error processing QA pair" in log or
+                                                          "'str' object has no attribute 'get'" in log)
+                               for log in final_logs)
                 benchmark_terminated_with_error = any("Benchmark process terminated with error code" in log for log in final_logs)
                 benchmark_already_marked_success = any("Benchmark process completed successfully" in log for log in final_logs)
+                # Même s'il y a des erreurs JSON, considérer le benchmark comme réussi
+                json_errors_only = any(("JSONDecodeError" in log or
+                                        "Error processing QA pair" in log or
+                                        "'str' object has no attribute 'get'" in log)
+                                     for log in final_logs) and not has_error
+                if json_errors_only:
+                    self._add_log("[INFO] Benchmark completed with minor JSON parsing warnings, considered successful")
+                # N'ajouter le message de succès que si aucune erreur grave n'a été détectée
+                if (not has_error and not benchmark_terminated_with_error and not benchmark_already_marked_success) or json_errors_only:
                     self._add_log("[SUCCESS] Benchmark process completed successfully")
             except Exception as config_error:

backend/tasks/create_bench.py CHANGED Viewed

@@ -109,6 +109,8 @@ class CreateBenchTask:
         # Flag pour détecter les erreurs de rate limiting
         rate_limit_detected = False
         try:
             while self.is_running() and self.process:
@@ -133,6 +135,15 @@ class CreateBenchTask:
                         rate_limit_detected = True
                         self._add_log("[ERROR] RATE_LIMIT_EXCEEDED: The demo is under heavy load at the moment.")
                     # Log raw output for debugging
                     self._add_log(f"[DEBUG] Raw output: {line}")
                     # Filter and format the line as needed
@@ -160,9 +171,12 @@ class CreateBenchTask:
             # Check exit code once the process is finished
             if self.process:
                 exit_code = self.process.poll()
-                if exit_code == 0:
-                    # Seulement ajouter le message de succès si le code de sortie est 0
-                    self._add_log("[SUCCESS] Benchmark process completed successfully")
                 else:
                     # Si une erreur de rate limiting a été détectée, afficher un message spécifique
                     if rate_limit_detected:

         # Flag pour détecter les erreurs de rate limiting
         rate_limit_detected = False
+        # Flag pour les erreurs JSON non critiques
+        json_errors_detected = False
         try:
             while self.is_running() and self.process:
                         rate_limit_detected = True
                         self._add_log("[ERROR] RATE_LIMIT_EXCEEDED: The demo is under heavy load at the moment.")
+                    # Détecter les erreurs JSON non critiques
+                    if ("JSONDecodeError" in line or
+                        "Error processing QA pair" in line or
+                        "'str' object has no attribute 'get'" in line):
+                        json_errors_detected = True
+                        # Ne pas les marquer comme erreurs mais comme avertissements
+                        self._add_log(f"[WARN] Non-critical JSON error: {line}")
+                        continue  # Passer à la ligne suivante
                     # Log raw output for debugging
                     self._add_log(f"[DEBUG] Raw output: {line}")
                     # Filter and format the line as needed
             # Check exit code once the process is finished
             if self.process:
                 exit_code = self.process.poll()
+                if exit_code == 0 or json_errors_detected:
+                    # Considérer le processus comme réussi même avec des erreurs JSON
+                    if json_errors_detected:
+                        self._add_log("[INFO] Benchmark completed with non-critical JSON errors, considered successful")
+                    else:
+                        self._add_log("[SUCCESS] Benchmark process completed successfully")
                 else:
                     # Si une erreur de rate limiting a été détectée, afficher un message spécifique
                     if rate_limit_detected:

backend/tasks/get_available_model_provider.py CHANGED Viewed

@@ -105,7 +105,7 @@ def get_available_model_provider(model_name, verbose=False):
         # Get providers for the model and prioritize them
         try:
-            info = model_info(model_name, expand="inferenceProviderMapping")
             if not hasattr(info, "inference_provider_mapping"):
                 if verbose:
                     logger.info(f"No inference providers found for {model_name}")

         # Get providers for the model and prioritize them
         try:
+            info = model_info(model_name, token=hf_token, expand="inferenceProviderMapping")
             if not hasattr(info, "inference_provider_mapping"):
                 if verbose:
                     logger.info(f"No inference providers found for {model_name}")