Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 4

Commit

a86c1f9

1 Parent(s): ab8d3f5

update intro | fix evaluation

Browse files

Files changed (7) hide show

backend/routes/benchmark.py +9 -1
backend/tasks/create_bench.py +6 -34
backend/tasks/create_bench_config_file.py +5 -5
frontend/src/components/Benchmark/Generator.jsx +1 -1
frontend/src/components/Evaluation/Evaluation.jsx +77 -15
frontend/src/components/Evaluation/hooks/useSimulation.js +30 -27
frontend/src/components/Intro.jsx +6 -2

backend/routes/benchmark.py CHANGED Viewed

@@ -214,7 +214,15 @@ class UnifiedBenchmarkTask:
                 # Mark as completed
                 self.is_completed = True
-                self._add_log("[SUCCESS] Benchmark process completed successfully")
             except Exception as config_error:
                 error_msg = str(config_error)

                 # Mark as completed
                 self.is_completed = True
+                # Vérifier si une erreur a été détectée dans les logs du benchmark
+                has_error = any("[ERROR]" in log for log in final_logs)
+                benchmark_terminated_with_error = any("Benchmark process terminated with error code" in log for log in final_logs)
+                benchmark_already_marked_success = any("Benchmark process completed successfully" in log for log in final_logs)
+                # N'ajouter le message de succès que si aucune erreur n'a été détectée
+                if not has_error and not benchmark_terminated_with_error and not benchmark_already_marked_success:
+                    self._add_log("[SUCCESS] Benchmark process completed successfully")
             except Exception as config_error:
                 error_msg = str(config_error)

backend/tasks/create_bench.py CHANGED Viewed

@@ -158,15 +158,19 @@ class CreateBenchTask:
             if self.process:
                 exit_code = self.process.poll()
                 if exit_code == 0:
                     self._add_log("[SUCCESS] Benchmark process completed successfully")
                 else:
                     # Si une erreur de rate limiting a été détectée, afficher un message spécifique
                     if rate_limit_detected:
                         self._add_log("[ERROR] Benchmark process failed due to API rate limiting. The demo is under heavy load at the moment.")
-                    else:
-                        self._add_log(f"[ERROR] Benchmark process terminated with error code: {exit_code}")
         except Exception as e:
             self._add_log(f"[ERROR] Error during output capture: {str(e)}")
         finally:
             self.is_completed = True
             self.is_running_flag.clear()
@@ -201,32 +205,6 @@ class CreateBenchTask:
         # Si aucune correspondance n'est trouvée, renvoyer le nom d'origine
         return stage_name
-    def _simulate_ingestion_process(self) -> None:
-        """
-        Simulate the ingestion process for development mode
-        """
-        self._add_log("[INFO] Simulating ingestion process")
-        # Simuler les étapes avec les mêmes noms que ceux attendus par le frontend
-        steps = [
-            ("ingestion", 2),
-            ("upload_ingest_to_hub", 3),
-            ("summarization", 2),
-            ("chunking", 3),
-            ("single_shot_question_generation", 4)
-        ]
-        for step, delay in steps:
-            # Ajouter un message de début d'étape
-            self._add_log(f"[INFO] Processing {step}...")
-            time.sleep(delay)  # Simuler un délai
-            # Marquer l'étape comme terminée
-            self._add_log(f"[SUCCESS] Stage completed: {step}")
-        # Marquer la tâche comme terminée
-        self.is_completed = True
-        self._add_log("[SUCCESS] Benchmark process completed successfully")
     def run(self, token: Optional[str] = None) -> None:
         """
         Run the ingestion task
@@ -278,12 +256,6 @@ class CreateBenchTask:
                 env["HF_ORGANIZATION"] = os.getenv("HF_ORGANIZATION", "yourbench")
                 self._add_log("[INFO] Environment variables exported")
-            # In development mode, only simulate ingestion
-            if os.environ.get("DEVELOPMENT_MODE", "").lower() == "true":
-                self._add_log("[INFO] Development mode enabled, simulating ingestion")
-                self._simulate_ingestion_process()
-                return
             # Start the process
             self._add_log(f"[INFO] Executing command: {' '.join(self.command)}")

             if self.process:
                 exit_code = self.process.poll()
                 if exit_code == 0:
+                    # Seulement ajouter le message de succès si le code de sortie est 0
                     self._add_log("[SUCCESS] Benchmark process completed successfully")
                 else:
                     # Si une erreur de rate limiting a été détectée, afficher un message spécifique
                     if rate_limit_detected:
                         self._add_log("[ERROR] Benchmark process failed due to API rate limiting. The demo is under heavy load at the moment.")
+                    # else:
+                    #     self._add_log(f"[ERROR] Benchmark process terminated with error code: {exit_code}")
+                    # Message informatif sur la fin du processus avec erreurs
+                    self._add_log("[INFO] Benchmark process completed with errors")
         except Exception as e:
             self._add_log(f"[ERROR] Error during output capture: {str(e)}")
+            # Ne pas ajouter de message de succès en cas d'exception
         finally:
             self.is_completed = True
             self.is_running_flag.clear()
         # Si aucune correspondance n'est trouvée, renvoyer le nom d'origine
         return stage_name
     def run(self, token: Optional[str] = None) -> None:
         """
         Run the ingestion task
                 env["HF_ORGANIZATION"] = os.getenv("HF_ORGANIZATION", "yourbench")
                 self._add_log("[INFO] Environment variables exported")
             # Start the process
             self._add_log(f"[INFO] Executing command: {' '.join(self.command)}")

backend/tasks/create_bench_config_file.py CHANGED Viewed

@@ -124,7 +124,7 @@ class CreateBenchConfigTask:
             # "Qwen/Qwen2.5-72B-Instruct"
             # "meta-llama/Llama-3.1-8B-Instruct"
             # "Qwen/Qwen2.5-32B-Instruct",
-            # "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
         ]
         # Track found models
@@ -167,11 +167,11 @@ class CreateBenchConfigTask:
             "model_list": model_list,
             "model_roles": {
-                "ingestion": ["Qwen/Qwen2.5-32B-Instruct"],
-                "summarization": ["Qwen/Qwen2.5-32B-Instruct"],
                 "chunking": ["intfloat/multilingual-e5-large-instruct"],
-                "single_shot_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
-                "multi_hop_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
             },
             "pipeline": {
                 "ingestion": {

             # "Qwen/Qwen2.5-72B-Instruct"
             # "meta-llama/Llama-3.1-8B-Instruct"
             # "Qwen/Qwen2.5-32B-Instruct",
+            "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
         ]
         # Track found models
             "model_list": model_list,
             "model_roles": {
+                "ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
+                "summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
                 "chunking": ["intfloat/multilingual-e5-large-instruct"],
+                "single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
+                "multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
             },
             "pipeline": {
                 "ingestion": {

frontend/src/components/Benchmark/Generator.jsx CHANGED Viewed

@@ -513,7 +513,7 @@ const Generator = ({ sessionId, isDefaultDocument, onComplete }) => {
             fontWeight: 500,
           }}
         >
-          Estimated time: ~ 1m30s
         </Typography>
       </Box>

             fontWeight: 500,
           }}
         >
+          Estimated time ~ 1m30s
         </Typography>
       </Box>

frontend/src/components/Evaluation/Evaluation.jsx CHANGED Viewed

@@ -1,12 +1,43 @@
-import React from "react";
 import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
 import { useNavigate, useSearchParams } from "react-router-dom";
 import ErrorOutlineIcon from "@mui/icons-material/ErrorOutline";
-import { useSimulation } from "./hooks/useSimulation";
 import { useTimer } from "./hooks/useTimer";
 import { useEvaluation } from "./hooks/useEvaluation";
 import ErrorDisplay from "../common/ErrorDisplay";
 const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
   const [searchParams] = useSearchParams();
   const isDefault =
@@ -14,18 +45,14 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
     ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
   const navigate = useNavigate();
   // Use our custom hooks
   const { formatElapsedTime, stopTimer } = useTimer();
-  const {
-    startingMessageIndex,
-    evaluationComplete: simulationComplete,
-    currentMessage,
-  } = useSimulation(() => {
-    if (onComplete) {
-      onComplete();
-    }
-  });
   const {
     error,
     evaluationComplete: realComplete,
@@ -40,22 +67,57 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
     }
   });
   // Handle automatic redirection when evaluation is complete
-  React.useEffect(() => {
     if (realComplete || simulationComplete) {
       navigate(`/evaluation-display?session=${sessionId}`);
     }
   }, [realComplete, simulationComplete, sessionId, navigate]);
   // Start evaluation if not default and not started
-  React.useEffect(() => {
     if (!isDefault && !evaluationStarted) {
       startEvaluation();
     }
   }, [isDefault, evaluationStarted, startEvaluation]);
   // Stop timer when complete
-  React.useEffect(() => {
     if (realComplete || simulationComplete) {
       stopTimer();
     }
@@ -63,7 +125,7 @@ const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
   const isComplete = realComplete || simulationComplete;
   const currentStepInfo = isDefault
-    ? `${currentMessage.message} (${currentMessage.step}/${currentMessage.totalSteps})`
     : `${currentStepLabel} (${currentStep + 1}/${totalSteps})`;
   return (

+import React, { useState, useEffect, useRef } from "react";
 import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
 import { useNavigate, useSearchParams } from "react-router-dom";
 import ErrorOutlineIcon from "@mui/icons-material/ErrorOutline";
 import { useTimer } from "./hooks/useTimer";
 import { useEvaluation } from "./hooks/useEvaluation";
 import ErrorDisplay from "../common/ErrorDisplay";
+// Messages de simulation pour les documents par défaut avec timing personnalisé
+const SIMULATION_MESSAGES = [
+  {
+    message: "Initializing evaluation environment",
+    step: 1,
+    totalSteps: 5,
+    timing: 0,
+  }, // Immédiat
+  {
+    message: "Finding available model providers",
+    step: 2,
+    totalSteps: 5,
+    timing: 1500,
+  }, // Après 1.5s
+  {
+    message: "Starting evaluation process",
+    step: 3,
+    totalSteps: 5,
+    timing: 3000,
+  }, // Après 3s
+  { message: "Evaluating models", step: 4, totalSteps: 5, timing: 5000 }, // Après 5s
+  {
+    message: "Storing evaluation results",
+    step: 5,
+    totalSteps: 5,
+    timing: 7000,
+  }, // Après 7s
+];
+// Durée totale pour rediriger à la fin de la simulation
+const TOTAL_SIMULATION_DURATION = 8000; // 8 secondes
 const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
   const [searchParams] = useSearchParams();
   const isDefault =
     ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
   const navigate = useNavigate();
+  const simulationTimeoutsRef = useRef([]);
+  // États pour la simulation
+  const [simulationStep, setSimulationStep] = useState(0);
+  const [simulationComplete, setSimulationComplete] = useState(false);
   // Use our custom hooks
   const { formatElapsedTime, stopTimer } = useTimer();
   const {
     error,
     evaluationComplete: realComplete,
     }
   });
+  // Gérer la simulation pour les documents par défaut
+  useEffect(() => {
+    // Seulement exécuter pour les documents par défaut et seulement une fois
+    if (!isDefault || simulationTimeoutsRef.current.length > 0) return;
+    console.log("Starting simulation for default document:", sessionId);
+    // Créer des timeouts pour chaque étape avec les timings personnalisés
+    for (let i = 1; i < SIMULATION_MESSAGES.length; i++) {
+      const messageData = SIMULATION_MESSAGES[i];
+      const timeout = setTimeout(() => {
+        console.log(`Simulation step ${i + 1}: ${messageData.message}`);
+        setSimulationStep(i);
+      }, messageData.timing);
+      simulationTimeoutsRef.current.push(timeout);
+    }
+    // Timeout final pour terminer la simulation
+    const finalTimeout = setTimeout(() => {
+      console.log("Simulation complete, redirecting");
+      setSimulationComplete(true);
+      if (onComplete) {
+        onComplete();
+      }
+    }, TOTAL_SIMULATION_DURATION);
+    simulationTimeoutsRef.current.push(finalTimeout);
+    // Nettoyage au démontage
+    return () => {
+      simulationTimeoutsRef.current.forEach(clearTimeout);
+    };
+  }, [isDefault, sessionId, onComplete]);
   // Handle automatic redirection when evaluation is complete
+  useEffect(() => {
     if (realComplete || simulationComplete) {
       navigate(`/evaluation-display?session=${sessionId}`);
     }
   }, [realComplete, simulationComplete, sessionId, navigate]);
   // Start evaluation if not default and not started
+  useEffect(() => {
     if (!isDefault && !evaluationStarted) {
       startEvaluation();
     }
   }, [isDefault, evaluationStarted, startEvaluation]);
   // Stop timer when complete
+  useEffect(() => {
     if (realComplete || simulationComplete) {
       stopTimer();
     }
   const isComplete = realComplete || simulationComplete;
   const currentStepInfo = isDefault
+    ? `${SIMULATION_MESSAGES[simulationStep].message} (${SIMULATION_MESSAGES[simulationStep].step}/${SIMULATION_MESSAGES[simulationStep].totalSteps})`
     : `${currentStepLabel} (${currentStep + 1}/${totalSteps})`;
   return (

frontend/src/components/Evaluation/hooks/useSimulation.js CHANGED Viewed

@@ -1,7 +1,8 @@
 import { useState, useRef, useEffect } from "react";
 // Simulation time in milliseconds for pre-calculated documents
-const SIMULATION_DURATION = 120000; // 2 minutes
 // Starting messages with their timing
 const STARTING_MESSAGES = [
@@ -12,44 +13,46 @@ const STARTING_MESSAGES = [
   { message: "Storing evaluation results", step: 5, totalSteps: 5 },
 ];
-export const useSimulation = (onComplete) => {
   const [startingMessageIndex, setStartingMessageIndex] = useState(0);
   const [evaluationComplete, setEvaluationComplete] = useState(false);
-  const simulationTimeoutRef = useRef(null);
-  const startingMessageIntervalRef = useRef(null);
   useEffect(() => {
-    // Configure automatic interval for message changes
-    startingMessageIntervalRef.current = setInterval(() => {
-      setStartingMessageIndex((prev) => {
-        if (prev < STARTING_MESSAGES.length - 1) {
-          return prev + 1;
-        }
-        return prev;
-      });
-    }, SIMULATION_DURATION / STARTING_MESSAGES.length);
-    // Complete after simulation duration
-    simulationTimeoutRef.current = setTimeout(() => {
       setEvaluationComplete(true);
-      if (startingMessageIntervalRef.current) {
-        clearInterval(startingMessageIntervalRef.current);
-      }
-      setStartingMessageIndex(STARTING_MESSAGES.length - 1);
       if (onComplete) {
         onComplete();
       }
     }, SIMULATION_DURATION);
     return () => {
-      if (simulationTimeoutRef.current) {
-        clearTimeout(simulationTimeoutRef.current);
-      }
-      if (startingMessageIntervalRef.current) {
-        clearInterval(startingMessageIntervalRef.current);
-      }
     };
-  }, [onComplete]);
   return {
     startingMessageIndex,

 import { useState, useRef, useEffect } from "react";
 // Simulation time in milliseconds for pre-calculated documents
+const SIMULATION_DURATION = 8000; // 8 secondes au total
+const STEP_DURATION = SIMULATION_DURATION / 5; // Durée de chaque étape
 // Starting messages with their timing
 const STARTING_MESSAGES = [
   { message: "Storing evaluation results", step: 5, totalSteps: 5 },
 ];
+export const useSimulation = (onComplete, shouldStart = false) => {
   const [startingMessageIndex, setStartingMessageIndex] = useState(0);
   const [evaluationComplete, setEvaluationComplete] = useState(false);
+  const timeoutsRef = useRef([]);
+  const hasInitializedRef = useRef(false);
+  // Effet pour démarrer la simulation si shouldStart est true
   useEffect(() => {
+    if (!shouldStart || hasInitializedRef.current) return;
+    // Marquer comme initialisé
+    hasInitializedRef.current = true;
+    console.log("Simulation starting with shouldStart =", shouldStart);
+    // Programmer des timeouts séquentiels pour chaque étape
+    for (let i = 1; i < STARTING_MESSAGES.length; i++) {
+      const timeout = setTimeout(() => {
+        console.log(`Setting message index to ${i}`);
+        setStartingMessageIndex(i);
+      }, i * STEP_DURATION);
+      timeoutsRef.current.push(timeout);
+    }
+    // Programmer la fin de la simulation
+    const completeTimeout = setTimeout(() => {
+      console.log("Completing simulation");
       setEvaluationComplete(true);
       if (onComplete) {
         onComplete();
       }
     }, SIMULATION_DURATION);
+    timeoutsRef.current.push(completeTimeout);
     return () => {
+      // Nettoyer tous les timeouts lors du démontage
+      timeoutsRef.current.forEach(clearTimeout);
     };
+  }, [shouldStart, onComplete]);
   return {
     startingMessageIndex,

frontend/src/components/Intro.jsx CHANGED Viewed

@@ -43,8 +43,12 @@ const Intro = () => {
         <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
         to keep your large language models on their toes—even as new data
         sources, domains, and knowledge demands evolve.
-        <br />
-        <br /> Currently, this is an <b>extremely minimal demo</b>. <br />
         To <b>unlock the full capabilities</b>, please visit our{" "}
         <Link
           href="https://github.com/yourbench"

         <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
         to keep your large language models on their toes—even as new data
         sources, domains, and knowledge demands evolve.
+      </Typography>
+      <Typography
+        variant="body2"
+        sx={{ maxWidth: "800px", mx: "auto", mt: 2, opacity: 0.4 }}
+      >
+        Currently, this is an <b>extremely minimal demo</b>. <br />
         To <b>unlock the full capabilities</b>, please visit our{" "}
         <Link
           href="https://github.com/yourbench"