Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 2

Commit

79407fd

1 Parent(s): 259cd68

update evaluation progress

Browse files

Files changed (3) hide show

backend/routes/evaluation.py +6 -1
backend/tasks/evaluation_task.py +21 -7
frontend/src/components/BenchmarkEvaluation.jsx +72 -36

backend/routes/evaluation.py CHANGED Viewed

@@ -92,10 +92,15 @@ async def get_evaluation_logs(session_id: str):
     if is_completed and hasattr(evaluation_task, 'results') and evaluation_task.results:
         results = evaluation_task.results
     return {
         "logs": logs,
         "is_completed": is_completed,
-        "results": results
     }
 @router.get("/evaluation-results/{session_id}")

     if is_completed and hasattr(evaluation_task, 'results') and evaluation_task.results:
         results = evaluation_task.results
+    # Récupérer l'information sur les étapes
+    progress = evaluation_task.get_progress()
     return {
         "logs": logs,
         "is_completed": is_completed,
+        "results": results,
+        "current_step": progress["current_step"],
+        "completed_steps": progress["completed_steps"]
     }
 @router.get("/evaluation-results/{session_id}")

backend/tasks/evaluation_task.py CHANGED Viewed

@@ -42,21 +42,35 @@ class EvaluationTask:
         self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
         self.current_step = "initializing"
         self.completed_steps = []
         # Nettoyer les anciens résultats si demandé
         if clean_old_results:
             self.clean_old_results()
-    def update_step(self, step: str) -> None:
         """
-        Update the current step and completed steps
         Args:
             step: Name of the step to update
         """
         self.current_step = step
         if step not in self.completed_steps:
             self.completed_steps.append(step)
     def get_progress(self) -> Dict:
         """
@@ -270,7 +284,7 @@ TASKS_TABLE = [yourbench]
         ]
         # Step 1: Check available providers for each model
-        self.update_step("finding_available_model_providers")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
         model_providers = {}
@@ -288,11 +302,11 @@ TASKS_TABLE = [yourbench]
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Found providers for {len(model_providers)} models")
         # Step 2: Run evaluations in parallel
-        self.update_step("starting_evaluation_process")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Starting evaluation process...")
         # Step 3: Evaluate models
-        self.update_step("evaluating_models")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Evaluating models...")
         tasks = []
@@ -306,13 +320,13 @@ TASKS_TABLE = [yourbench]
         self.results = [r for r in results if r["status"] == "success"]
         # Step 4: Save results
-        self.update_step("storing_evaluation_results")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Storing evaluation results...")
         self._save_results_to_hub()
         # Mark task as completed
         self.is_completed = True
-        self.update_step("completed")
         total_time = time.time() - script_start_time
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Evaluation completed in {total_time:.2f}s")

         self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
         self.current_step = "initializing"
         self.completed_steps = []
+        self.step_start_time = time.time()  # Enregistrer le temps de début de l'étape actuelle
         # Nettoyer les anciens résultats si demandé
         if clean_old_results:
             self.clean_old_results()
+    async def update_step(self, step: str) -> None:
         """
+        Update the current step and completed steps with a minimum delay of 1 second
         Args:
             step: Name of the step to update
         """
+        # Calculer le temps écoulé depuis le début de l'étape précédente
+        elapsed_since_step_start = time.time() - self.step_start_time
+        # Si moins d'une seconde s'est écoulée, attendre pour compléter la seconde
+        if elapsed_since_step_start < 1.0:
+            await asyncio.sleep(1.0 - elapsed_since_step_start)
+        # Mettre à jour l'étape courante et enregistrer le nouvel horodatage
         self.current_step = step
+        self.step_start_time = time.time()
+        # Ajouter aux étapes complétées si nécessaire
         if step not in self.completed_steps:
             self.completed_steps.append(step)
+        print(f"[{datetime.now().strftime('%H:%M:%S')}] Step changed to: {step}")
     def get_progress(self) -> Dict:
         """
         ]
         # Step 1: Check available providers for each model
+        await self.update_step("finding_available_model_providers")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
         model_providers = {}
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Found providers for {len(model_providers)} models")
         # Step 2: Run evaluations in parallel
+        await self.update_step("starting_evaluation_process")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Starting evaluation process...")
         # Step 3: Evaluate models
+        await self.update_step("evaluating_models")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Evaluating models...")
         tasks = []
         self.results = [r for r in results if r["status"] == "success"]
         # Step 4: Save results
+        await self.update_step("storing_evaluation_results")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Storing evaluation results...")
         self._save_results_to_hub()
         # Mark task as completed
         self.is_completed = True
+        await self.update_step("completed")
         total_time = time.time() - script_start_time
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Evaluation completed in {total_time:.2f}s")

frontend/src/components/BenchmarkEvaluation.jsx CHANGED Viewed

@@ -49,19 +49,18 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
   // Add effect to handle starting messages
   useEffect(() => {
-    startingMessageIntervalRef.current = setInterval(
-      () => {
         setStartingMessageIndex((prev) => {
           if (prev < STARTING_MESSAGES.length - 1) {
             return prev + 1;
           }
           return prev;
         });
-      },
-      isDefault
-        ? MESSAGE_CHANGE_INTERVAL.PRECALCULATED
-        : MESSAGE_CHANGE_INTERVAL.DEFAULT
-    );
     return () => {
       if (startingMessageIntervalRef.current) {
@@ -116,16 +115,24 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
                   clearInterval(startingMessageIntervalRef.current);
                 }
               } else {
-                // Si l'évaluation est toujours en cours, mettre à jour l'étape actuelle
-                // basée sur le temps écoulé
-                const progress = Math.min(
-                  Math.floor(
-                    (Date.now() - startTimeRef.current) /
-                      MESSAGE_CHANGE_INTERVAL.DEFAULT
-                  ),
-                  STARTING_MESSAGES.length - 1
-                );
-                setStartingMessageIndex(progress);
               }
             }
           } catch (error) {
@@ -191,6 +198,35 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
     ].join(":");
   };
   // Start benchmark evaluation
   const startEvaluation = async () => {
     if (!sessionId) {
@@ -242,25 +278,25 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
                   clearInterval(startingMessageIntervalRef.current);
                 }
               } else {
-                // Si l'évaluation est toujours en cours, estimer la progression
-                // en fonction du temps écoulé
-                const elapsedSinceStart = Date.now() - startTimeRef.current;
-                // Estimer la progression (en supposant qu'une évaluation prend environ 80 secondes)
-                const estimatedTotalTime = 80000; // 80 secondes
-                const estimatedProgress = Math.min(
-                  elapsedSinceStart / estimatedTotalTime,
-                  1
-                );
-                // Calculer l'étape estimée (0 à STARTING_MESSAGES.length - 1)
-                const estimatedStepIndex = Math.min(
-                  Math.floor(estimatedProgress * STARTING_MESSAGES.length),
-                  STARTING_MESSAGES.length - 1
-                );
-                // Mettre à jour l'index des messages de démarrage si nécessaire
-                if (estimatedStepIndex > startingMessageIndex) {
                   setStartingMessageIndex(estimatedStepIndex);
                 }
               }

   // Add effect to handle starting messages
   useEffect(() => {
+    // Ne configurer l'intervalle automatique que pour les documents par défaut
+    // Pour les évaluations réelles, on se fiera uniquement aux mises à jour de l'API
+    if (isDefault) {
+      startingMessageIntervalRef.current = setInterval(() => {
         setStartingMessageIndex((prev) => {
           if (prev < STARTING_MESSAGES.length - 1) {
             return prev + 1;
           }
           return prev;
         });
+      }, MESSAGE_CHANGE_INTERVAL.PRECALCULATED);
+    }
     return () => {
       if (startingMessageIntervalRef.current) {
                   clearInterval(startingMessageIntervalRef.current);
                 }
               } else {
+                // Si l'évaluation est toujours en cours, utiliser l'étape actuelle du backend
+                if (logsResult.current_step) {
+                  // Utiliser la fonction de mappage pour déterminer l'index du message
+                  const newIndex = mapStepToMessageIndex(
+                    logsResult.current_step
+                  );
+                  setStartingMessageIndex(newIndex);
+                } else {
+                  // Fallback basé sur le temps si l'étape n'est pas disponible
+                  const progress = Math.min(
+                    Math.floor(
+                      (Date.now() - startTimeRef.current) /
+                        MESSAGE_CHANGE_INTERVAL.DEFAULT
+                    ),
+                    STARTING_MESSAGES.length - 1
+                  );
+                  setStartingMessageIndex(progress);
+                }
               }
             }
           } catch (error) {
     ].join(":");
   };
+  // Fonction pour mapper le nom de l'étape backend vers l'index dans STARTING_MESSAGES
+  const mapStepToMessageIndex = (currentStep) => {
+    switch (currentStep) {
+      case "initializing":
+        return 0;
+      case "finding_available_model_providers":
+        return 1;
+      case "starting_evaluation_process":
+        return 2;
+      case "evaluating_models":
+        return 3;
+      case "storing_evaluation_results":
+      case "completed":
+        return 4;
+      default:
+        // Calculer l'étape en fonction du temps écoulé si l'étape n'est pas reconnue
+        const elapsedSinceStart = Date.now() - startTimeRef.current;
+        const estimatedTotalTime = 80000; // 80 secondes
+        const estimatedProgress = Math.min(
+          elapsedSinceStart / estimatedTotalTime,
+          1
+        );
+        return Math.min(
+          Math.floor(estimatedProgress * STARTING_MESSAGES.length),
+          STARTING_MESSAGES.length - 1
+        );
+    }
+  };
   // Start benchmark evaluation
   const startEvaluation = async () => {
     if (!sessionId) {
                   clearInterval(startingMessageIntervalRef.current);
                 }
               } else {
+                // Récupérer l'étape actuelle à partir de l'API, si disponible
+                if (logsResult.current_step) {
+                  // Utiliser la fonction de mappage pour déterminer l'index du message
+                  const newIndex = mapStepToMessageIndex(
+                    logsResult.current_step
+                  );
+                  setStartingMessageIndex(newIndex);
+                } else {
+                  // Fallback: Si l'API ne renvoie pas d'étape, estimer en fonction du temps
+                  const elapsedSinceStart = Date.now() - startTimeRef.current;
+                  const estimatedTotalTime = 80000; // 80 secondes
+                  const estimatedProgress = Math.min(
+                    elapsedSinceStart / estimatedTotalTime,
+                    1
+                  );
+                  const estimatedStepIndex = Math.min(
+                    Math.floor(estimatedProgress * STARTING_MESSAGES.length),
+                    STARTING_MESSAGES.length - 1
+                  );
                   setStartingMessageIndex(estimatedStepIndex);
                 }
               }