Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 4

Commit

81e0b0c

1 Parent(s): d88a570

update error handling, improve upload security checks

Browse files

Files changed (36) hide show

backend/lighteval_task/lighteval_task.py +2 -3
backend/pyproject.toml +1 -0
backend/routes/cleanup.py +1 -1
backend/routes/upload.py +82 -10
backend/tasks/create_bench_config_file.py +7 -6
backend/tasks/evaluation_task.py +35 -36
backend/tasks/get_available_model_provider.py +8 -1
frontend/server.js +0 -2
frontend/src/App.js +4 -21
frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx} +12 -12
frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx} +6 -26
frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx} +152 -180
frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js +192 -0
frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js +106 -0
frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js +66 -0
frontend/src/components/BenchmarkEvaluation.jsx +0 -401
frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx} +11 -8
frontend/src/components/Evaluation/Evaluation.jsx +150 -0
frontend/src/components/Evaluation/hooks/useEvaluation.js +148 -0
frontend/src/components/Evaluation/hooks/useSimulation.js +59 -0
frontend/src/components/Evaluation/hooks/useTimer.js +48 -0
frontend/src/components/Footer/Footer.js +7 -5
frontend/src/components/Intro.jsx +14 -2
frontend/src/components/KeyboardShortcuts.jsx +0 -24
frontend/src/components/{ExternalLinks.jsx → Navigation.jsx} +3 -3
frontend/src/components/common/ErrorDisplay.jsx +43 -0
frontend/src/components/shared/AuthContainer.js +0 -192
frontend/src/components/shared/CodeBlock.js +0 -37
frontend/src/components/shared/FilterTag.js +0 -139
frontend/src/components/shared/InfoIconWithTooltip.js +0 -87
frontend/src/components/shared/PageHeader.js +0 -29
frontend/src/pages/BenchmarkDisplayPage.jsx +2 -2
frontend/src/pages/BenchmarkEvaluationPage.jsx +2 -2
frontend/src/pages/BenchmarkGenerationPage.jsx +2 -2
frontend/src/pages/EvaluationDisplayPage.jsx +5 -9
frontend/src/pages/HomePage.jsx +2 -2

backend/lighteval_task/lighteval_task.py CHANGED Viewed

@@ -218,11 +218,10 @@ def process_judge_response_yourbench(response):
 class JudgeLLMYourBench(JudgeLLM):
     def __init__(self):
         super().__init__(
-            judge_model_name="Qwen/QwQ-32B",
             template=get_judge_prompt,
             process_judge_response=process_judge_response_yourbench,
-            judge_backend="inference-providers",
-            hf_provider="novita",
             short_judge_name="yourbench_judge",
         )

 class JudgeLLMYourBench(JudgeLLM):
     def __init__(self):
         super().__init__(
+            judge_model_name="gpt-4o-2024-08-06",
             template=get_judge_prompt,
             process_judge_response=process_judge_response_yourbench,
+            judge_backend="openai",
             short_judge_name="yourbench_judge",
         )

backend/pyproject.toml CHANGED Viewed

@@ -25,6 +25,7 @@ dependencies = [
     "pydantic>=2.6.0",
     "PyPDF2>=3.0.0",
     "beautifulsoup4>=4.12.0",
 ]
 [build-system]

     "pydantic>=2.6.0",
     "PyPDF2>=3.0.0",
     "beautifulsoup4>=4.12.0",
+    "evaluate>=0.4.0",
 ]
 [build-system]

backend/routes/cleanup.py CHANGED Viewed

@@ -27,7 +27,7 @@ async def cleanup_session(session_id: str):
     """
     # Check if we are in development mode
     # if os.environ.get("ENVIRONEMENT", "").lower() == "development":
-    if False:
         logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
         return {
             "success": True,

     """
     # Check if we are in development mode
     # if os.environ.get("ENVIRONEMENT", "").lower() == "development":
+    if True:
         logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
         return {
             "success": True,

backend/routes/upload.py CHANGED Viewed

@@ -14,12 +14,23 @@ session_files = {}
 UPLOAD_ROOT = "uploaded_files"
 os.makedirs(UPLOAD_ROOT, exist_ok=True)
 def validate_pdf(file_path: str) -> bool:
     """Validate if file is a valid PDF."""
     try:
         reader = PdfReader(file_path)
         # Vérifier que le PDF a au moins une page
-        return len(reader.pages) > 0
     except:
         return False
@@ -28,8 +39,8 @@ def validate_markdown(file_path: str) -> bool:
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
-            # Simple check: file should contain some content and at least one markdown element
-            return len(content) > 0 and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
     except:
         return False
@@ -37,7 +48,11 @@ def validate_html(file_path: str) -> bool:
     """Validate if file is a valid HTML file."""
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
-            BeautifulSoup(f.read(), 'html.parser')
             return True
     except:
         return False
@@ -47,7 +62,7 @@ def validate_txt(file_path: str) -> bool:
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
-            return len(content.strip()) > 0
     except:
         return False
@@ -112,19 +127,76 @@ async def upload_file(file: UploadFile = File(...)):
     # Valider le fichier selon son type
     is_valid = False
     if file_extension == '.pdf':
-        is_valid = validate_pdf(file_path)
     elif file_extension == '.md':
-        is_valid = validate_markdown(file_path)
     elif file_extension == '.html':
-        is_valid = validate_html(file_path)
     elif file_extension == '.txt':
-        is_valid = validate_txt(file_path)
     if not is_valid:
         # Supprimer le fichier invalide
         os.remove(file_path)
-        raise HTTPException(status_code=400, detail=f"Invalid {file_extension[1:].upper()} file")
     # Store file path for later use
     session_files[session_id] = file_path

 UPLOAD_ROOT = "uploaded_files"
 os.makedirs(UPLOAD_ROOT, exist_ok=True)
+# Longueur minimale pour tout fichier (en caractères)
+MIN_FILE_LENGTH = 500
 def validate_pdf(file_path: str) -> bool:
     """Validate if file is a valid PDF."""
     try:
         reader = PdfReader(file_path)
         # Vérifier que le PDF a au moins une page
+        if len(reader.pages) == 0:
+            return False
+        # Extraire le texte pour vérifier la longueur
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return len(text) >= MIN_FILE_LENGTH
     except:
         return False
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
+            # Vérifier longueur minimale et présence d'éléments markdown
+            return len(content) >= MIN_FILE_LENGTH and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
     except:
         return False
     """Validate if file is a valid HTML file."""
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            # Vérifier longueur minimale et structure HTML
+            if len(content) < MIN_FILE_LENGTH:
+                return False
+            BeautifulSoup(content, 'html.parser')
             return True
     except:
         return False
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             content = f.read()
+            return len(content.strip()) >= MIN_FILE_LENGTH
     except:
         return False
     # Valider le fichier selon son type
     is_valid = False
+    error_detail = ""
     if file_extension == '.pdf':
+        try:
+            reader = PdfReader(file_path)
+            if len(reader.pages) == 0:
+                error_detail = "PDF must contain at least one page"
+                is_valid = False
+            else:
+                text = ""
+                for page in reader.pages:
+                    text += page.extract_text()
+                if len(text) < MIN_FILE_LENGTH:
+                    error_detail = f"PDF contains {len(text)} characters but must contain at least {MIN_FILE_LENGTH}"
+                    is_valid = False
+                else:
+                    is_valid = True
+        except:
+            error_detail = "Invalid PDF format"
+            is_valid = False
     elif file_extension == '.md':
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+                if len(content) < MIN_FILE_LENGTH:
+                    error_detail = f"Markdown file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
+                    is_valid = False
+                elif not any(marker in content for marker in ['#', '-', '*', '`', '[', '>']):
+                    error_detail = "Markdown file does not contain any valid Markdown elements"
+                    is_valid = False
+                else:
+                    is_valid = True
+        except:
+            error_detail = "Invalid Markdown format"
+            is_valid = False
     elif file_extension == '.html':
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+                if len(content) < MIN_FILE_LENGTH:
+                    error_detail = f"HTML file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
+                    is_valid = False
+                else:
+                    BeautifulSoup(content, 'html.parser')
+                    is_valid = True
+        except:
+            error_detail = "Invalid HTML format"
+            is_valid = False
     elif file_extension == '.txt':
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+                content_length = len(content.strip())
+                if content_length < MIN_FILE_LENGTH:
+                    error_detail = f"Text file contains {content_length} characters but must contain at least {MIN_FILE_LENGTH}"
+                    is_valid = False
+                else:
+                    is_valid = True
+        except:
+            error_detail = "Invalid text format"
+            is_valid = False
     if not is_valid:
         # Supprimer le fichier invalide
         os.remove(file_path)
+        raise HTTPException(status_code=400, detail=error_detail or f"Invalid {file_extension[1:].upper()} file")
     # Store file path for later use
     session_files[session_id] = file_path

backend/tasks/create_bench_config_file.py CHANGED Viewed

@@ -123,7 +123,8 @@ class CreateBenchConfigTask:
         required_models = [
             # "Qwen/Qwen2.5-72B-Instruct"
             # "meta-llama/Llama-3.1-8B-Instruct"
-            "Qwen/Qwen2.5-32B-Instruct"
         ]
         # Track found models
@@ -166,11 +167,11 @@ class CreateBenchConfigTask:
             "model_list": model_list,
             "model_roles": {
-                "ingestion": ["Qwen/Qwen2.5-32B-Instruct"],
-                "summarization": ["Qwen/Qwen2.5-32B-Instruct"],
                 "chunking": ["intfloat/multilingual-e5-large-instruct"],
-                "single_shot_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
-                "multi_hop_question_generation": ["Qwen/Qwen2.5-32B-Instruct"],
             },
             "pipeline": {
                 "ingestion": {
@@ -201,7 +202,7 @@ class CreateBenchConfigTask:
                     "additional_instructions": "Generate rich and creative questions to test a curious adult",
                     "chunk_sampling": {
                         "mode": "count",
-                        "value": 10,
                         "random_seed": 123,
                     },
                 },

         required_models = [
             # "Qwen/Qwen2.5-72B-Instruct"
             # "meta-llama/Llama-3.1-8B-Instruct"
+            # "Qwen/Qwen2.5-32B-Instruct",
+            "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
         ]
         # Track found models
             "model_list": model_list,
             "model_roles": {
+                "ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
+                "summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
                 "chunking": ["intfloat/multilingual-e5-large-instruct"],
+                "single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
+                "multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
             },
             "pipeline": {
                 "ingestion": {
                     "additional_instructions": "Generate rich and creative questions to test a curious adult",
                     "chunk_sampling": {
                         "mode": "count",
+                        "value": 5,
                         "random_seed": 123,
                     },
                 },

backend/tasks/evaluation_task.py CHANGED Viewed

@@ -15,9 +15,20 @@ from typing import List, Dict
 from tasks.get_available_model_provider import get_available_model_provider
 from huggingface_hub import HfApi
 import asyncio
-# Valeur par défaut du timeout
-DEFAULT_EVALUATION_TIMEOUT = 120.0  # 1 minute par défaut
 class EvaluationTask:
     """
@@ -42,9 +53,9 @@ class EvaluationTask:
         self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
         self.current_step = "initializing"
         self.completed_steps = []
-        self.step_start_time = time.time()  # Enregistrer le temps de début de l'étape actuelle
-        # Nettoyer les anciens résultats si demandé
         if clean_old_results:
             self.clean_old_results()
@@ -55,18 +66,18 @@ class EvaluationTask:
         Args:
             step: Name of the step to update
         """
-        # Calculer le temps écoulé depuis le début de l'étape précédente
         elapsed_since_step_start = time.time() - self.step_start_time
-        # Si moins d'une seconde s'est écoulée, attendre pour compléter la seconde
         if elapsed_since_step_start < 1.0:
             await asyncio.sleep(1.0 - elapsed_since_step_start)
-        # Mettre à jour l'étape courante et enregistrer le nouvel horodatage
         self.current_step = step
         self.step_start_time = time.time()
-        # Ajouter aux étapes complétées si nécessaire
         if step not in self.completed_steps:
             self.completed_steps.append(step)
@@ -114,12 +125,12 @@ class EvaluationTask:
         Save evaluation results directly to the dataset on the Hub without persisting locally
         """
         try:
-            # Trier les résultats par précision (du plus précis au moins précis)
             sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
-            # Créer un fichier temporaire pour les résultats
             with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
-                # Ajouter metadata aux résultats
                 final_results = {
                     "metadata": {
                         "evaluation_date": datetime.now().isoformat(),
@@ -143,7 +154,7 @@ class EvaluationTask:
             print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
-            # Supprimer le fichier temporaire
             os.unlink(temp_file_path)
         except Exception as e:
             print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
@@ -267,15 +278,15 @@ TASKS_TABLE = [yourbench]
                 results = json.load(f)
                 print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
-                # Vérifier que la structure est celle attendue
                 if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
                     accuracy = results["results"]["all"]["accuracy"]
                     print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
                 else:
-                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure de résultats inattendue. Clés disponibles: {list(results.keys())}")
                     if "results" in results:
-                        print(f"[{datetime.now().strftime('%H:%M:%S')}] Clés dans 'results': {list(results['results'].keys()) if isinstance(results['results'], dict) else 'pas un dictionnaire'}")
-                    raise ValueError(f"Structure de résultats inattendue pour {model_name}")
             result_data = {
                 "model": model_name,
@@ -315,38 +326,26 @@ TASKS_TABLE = [yourbench]
         # Load environment variables
         load_dotenv()
-        # Models to evaluate - uniquement les modèles accessibles
-        models = [
-            "Qwen/QwQ-32B",
-            "Qwen/Qwen2.5-72B-Instruct",
-            "Qwen/Qwen2.5-32B-Instruct",
-            "meta-llama/Llama-3.1-8B-Instruct",
-            "meta-llama/Llama-3.3-70B-Instruct",
-            "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
-            "mistralai/Mistral-Small-24B-Instruct-2501",
-        ]
-        # Log pour voir la structure du dataset
         try:
-            from datasets import load_dataset
-            print(f"[{datetime.now().strftime('%H:%M:%S')}] Tentative de chargement du dataset {self.dataset_name} pour inspection")
             dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
-            # Vérifier la structure du premier exemple
             if len(dataset) > 0:
                 first_example = dataset[0]
-                print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure du premier exemple:")
-                print(f"[{datetime.now().strftime('%H:%M:%S')}] Clés: {first_example.keys()}")
-                print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', 'non trouvé')}")
         except Exception as e:
-            print(f"[{datetime.now().strftime('%H:%M:%S')}] Erreur lors de l'inspection du dataset: {str(e)}")
         # Step 1: Check available providers for each model
         await self.update_step("finding_available_model_providers")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
         model_providers = {}
-        for model in models:
             provider = get_available_model_provider(model, verbose=True)
             if provider:
                 model_providers[model] = provider

 from tasks.get_available_model_provider import get_available_model_provider
 from huggingface_hub import HfApi
 import asyncio
+from datasets import load_dataset
+# Default timeout value
+DEFAULT_EVALUATION_TIMEOUT = 60.0  # 1 minute by default
+# Models to evaluate - only accessible models
+DEFAULT_EVALUATION_MODELS = [
+    "Qwen/QwQ-32B",
+    "Qwen/Qwen2.5-72B-Instruct",
+    "Qwen/Qwen2.5-32B-Instruct",
+    "meta-llama/Llama-3.1-8B-Instruct",
+    "meta-llama/Llama-3.3-70B-Instruct",
+    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+    "mistralai/Mistral-Small-24B-Instruct-2501",
+]
 class EvaluationTask:
     """
         self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
         self.current_step = "initializing"
         self.completed_steps = []
+        self.step_start_time = time.time()  # Record the start time of the current step
+        # Clean old results if requested
         if clean_old_results:
             self.clean_old_results()
         Args:
             step: Name of the step to update
         """
+        # Calculate the elapsed time since the start of the previous step
         elapsed_since_step_start = time.time() - self.step_start_time
+        # If less than one second has passed, wait to complete the second
         if elapsed_since_step_start < 1.0:
             await asyncio.sleep(1.0 - elapsed_since_step_start)
+        # Update the current step and record the new timestamp
         self.current_step = step
         self.step_start_time = time.time()
+        # Add to completed steps if necessary
         if step not in self.completed_steps:
             self.completed_steps.append(step)
         Save evaluation results directly to the dataset on the Hub without persisting locally
         """
         try:
+            # Sort results by accuracy (from most accurate to least accurate)
             sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
+            # Create a temporary file for the results
             with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
+                # Add metadata to the results
                 final_results = {
                     "metadata": {
                         "evaluation_date": datetime.now().isoformat(),
             print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
+            # Delete the temporary file
             os.unlink(temp_file_path)
         except Exception as e:
             print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
                 results = json.load(f)
                 print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
+                # Verify that the structure is as expected
                 if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
                     accuracy = results["results"]["all"]["accuracy"]
                     print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
                 else:
+                    print(f"[{datetime.now().strftime('%H:%M:%S')}] Unexpected results structure. Available keys: {list(results.keys())}")
                     if "results" in results:
+                        print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys in 'results': {list(results['results'].keys()) if isinstance(results['results'], dict) else 'not a dictionary'}")
+                    raise ValueError(f"Unexpected results structure for {model_name}")
             result_data = {
                 "model": model_name,
         # Load environment variables
         load_dotenv()
+        # Log to see the structure of the dataset
         try:
+            print(f"[{datetime.now().strftime('%H:%M:%S')}] Attempting to load dataset {self.dataset_name} for inspection")
             dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
+            # Verify the structure of the first example
             if len(dataset) > 0:
                 first_example = dataset[0]
+                print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure of the first example:")
+                print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys: {first_example.keys()}")
+                print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', 'not found')}")
         except Exception as e:
+            print(f"[{datetime.now().strftime('%H:%M:%S')}] Error inspecting the dataset: {str(e)}")
         # Step 1: Check available providers for each model
         await self.update_step("finding_available_model_providers")
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
         model_providers = {}
+        for model in DEFAULT_EVALUATION_MODELS:
             provider = get_available_model_provider(model, verbose=True)
             if provider:
                 model_providers[model] = provider

backend/tasks/get_available_model_provider.py CHANGED Viewed

@@ -8,7 +8,7 @@ from dotenv import load_dotenv
 load_dotenv()
 # Define preferred providers
-PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -30,11 +30,17 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
     Returns:
         True if the provider is available, False otherwise
     """
     try:
         # Get HF token from environment
         hf_token = os.environ.get("HF_TOKEN")
         if not hf_token:
             raise ValueError("HF_TOKEN not defined in environment")
         if verbose:
             logger.info(f"Testing provider {provider} for model {model_name}")
@@ -44,6 +50,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
             model=model_name,
             token=hf_token,
             provider=provider,
             timeout=10  # Increased timeout to allow model loading
         )

 load_dotenv()
 # Define preferred providers
+PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
     Returns:
         True if the provider is available, False otherwise
     """
     try:
         # Get HF token from environment
         hf_token = os.environ.get("HF_TOKEN")
         if not hf_token:
             raise ValueError("HF_TOKEN not defined in environment")
+        # Get HF token from environment
+        hf_organization = os.environ.get("HF_ORGANIZATION")
+        if not hf_organization:
+            raise ValueError("HF_ORGANIZATION not defined in environment")
         if verbose:
             logger.info(f"Testing provider {provider} for model {model_name}")
             model=model_name,
             token=hf_token,
             provider=provider,
+            bill_to=hf_organization,
             timeout=10  # Increased timeout to allow model loading
         )

frontend/server.js CHANGED Viewed

@@ -34,8 +34,6 @@ app.use(
     "/health",
     "/upload",
     "/generate-benchmark",
-    "/config-logs",
-    "/benchmark-logs",
     "/benchmark-progress",
     "/benchmark-questions",
     "/evaluate-benchmark",

     "/health",
     "/upload",
     "/generate-benchmark",
     "/benchmark-progress",
     "/benchmark-questions",
     "/evaluate-benchmark",

frontend/src/App.js CHANGED Viewed

@@ -9,14 +9,13 @@ import {
 import getTheme from "./config/theme";
 import { useThemeMode } from "./hooks/useThemeMode";
 import { ThemeProvider } from "@mui/material/styles";
-import ExternalLinks from "./components/ExternalLinks";
-import KeyboardShortcuts from "./components/KeyboardShortcuts";
 import HomePage from "./pages/HomePage";
 import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
 import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
 import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
 import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
 // Function to synchronize URL hash with parent Hugging Face page
 const syncURLWithParent = () => {
   // This function is only necessary in a Hugging Face Spaces environment
@@ -83,9 +82,8 @@ function App() {
       <CssBaseline />
       <Router>
         <Container maxWidth="md">
-          <ExternalLinks />
           <Box sx={{ pt: 12, pb: 4 }}>
-            <KeyboardShortcuts />
             <Routes>
               <Route path="/" element={<HomePage />} />
               <Route
@@ -106,22 +104,7 @@ function App() {
               />
               <Route path="*" element={<Navigate to="/" replace />} />
             </Routes>
-            <Box
-              component="footer"
-              sx={{
-                mt: 4,
-                textAlign: "center",
-                fontSize: "0.875rem",
-                color: "text.secondary",
-                opacity: 0.7,
-                maxWidth: { xs: "100%", md: "70%" },
-                mx: "auto",
-              }}
-            >
-              We keep processed documents for research purposes, to which you
-              agree by using the space. For a fully private usage, please
-              duplicate the advanced space
-            </Box>
           </Box>
         </Container>
       </Router>

 import getTheme from "./config/theme";
 import { useThemeMode } from "./hooks/useThemeMode";
 import { ThemeProvider } from "@mui/material/styles";
+import Navigation from "./components/Navigation";
 import HomePage from "./pages/HomePage";
 import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
 import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
 import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
 import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
+import Footer from "./components/Footer/Footer";
 // Function to synchronize URL hash with parent Hugging Face page
 const syncURLWithParent = () => {
   // This function is only necessary in a Hugging Face Spaces environment
       <CssBaseline />
       <Router>
         <Container maxWidth="md">
+          <Navigation />
           <Box sx={{ pt: 12, pb: 4 }}>
             <Routes>
               <Route path="/" element={<HomePage />} />
               <Route
               />
               <Route path="*" element={<Navigate to="/" replace />} />
             </Routes>
+            <Footer />
           </Box>
         </Container>
       </Router>

frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx} RENAMED Viewed

@@ -25,18 +25,18 @@ import MenuBookIcon from "@mui/icons-material/MenuBook";
 import DownloadIcon from "@mui/icons-material/Download";
 import VisibilityIcon from "@mui/icons-material/Visibility";
 import CloseIcon from "@mui/icons-material/Close";
-import { useThemeMode } from "../hooks/useThemeMode";
-import getTheme from "../config/theme";
-import API_CONFIG from "../config/api";
 /**
  * Component for creating a new benchmark, including file upload and generation initiation
  *
  * @param {Object} props - Component props
  * @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
- * @returns {JSX.Element} BenchmarkCreateForm component
  */
-function BenchmarkCreateForm({ onStartGeneration }) {
   const { mode } = useThemeMode();
   const theme = getTheme(mode);
   const [isDragging, setIsDragging] = useState(false);
@@ -110,11 +110,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
       return;
     }
-    // Check file size limit (1MB = 1048576 bytes)
-    if (file.size > 1048576) {
       setUploadStatus({
         success: false,
-        message: "File size exceeds the 1MB limit",
       });
       setOpenSnackbar(true);
       return;
@@ -192,11 +192,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
       return;
     }
-    // Check file size limit (10MB = 10485760 bytes)
-    if (file.size > 10485760) {
       setUploadStatus({
         success: false,
-        message: "File size exceeds the 10MB limit",
       });
       setOpenSnackbar(true);
       return;
@@ -580,4 +580,4 @@ function BenchmarkCreateForm({ onStartGeneration }) {
   );
 }
-export default BenchmarkCreateForm;

 import DownloadIcon from "@mui/icons-material/Download";
 import VisibilityIcon from "@mui/icons-material/Visibility";
 import CloseIcon from "@mui/icons-material/Close";
+import { useThemeMode } from "../../hooks/useThemeMode";
+import getTheme from "../../config/theme";
+import API_CONFIG from "../../config/api";
 /**
  * Component for creating a new benchmark, including file upload and generation initiation
  *
  * @param {Object} props - Component props
  * @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
+ * @returns {JSX.Element} CreateForm component
  */
+function CreateForm({ onStartGeneration }) {
   const { mode } = useThemeMode();
   const theme = getTheme(mode);
   const [isDragging, setIsDragging] = useState(false);
       return;
     }
+    // Check file size limit (3MB = 3145728 bytes)
+    if (file.size > 1048576 * 2) {
       setUploadStatus({
         success: false,
+        message: "File size exceeds the 2MB limit",
       });
       setOpenSnackbar(true);
       return;
       return;
     }
+    // Check file size limit (3MB = 3145728 bytes)
+    if (file.size > 1048576 * 3) {
       setUploadStatus({
         success: false,
+        message: "File size exceeds the 3MB limit",
       });
       setOpenSnackbar(true);
       return;
   );
 }
+export default CreateForm;

frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx} RENAMED Viewed

@@ -16,9 +16,9 @@ import AssessmentIcon from "@mui/icons-material/Assessment";
 import LinkIcon from "@mui/icons-material/Link";
 import DownloadIcon from "@mui/icons-material/Download";
 import CheckCircleIcon from "@mui/icons-material/CheckCircle";
-import API_CONFIG from "../config/api";
-import { useThemeMode } from "../hooks/useThemeMode";
-import getTheme from "../config/theme";
 /**
  * Component to display benchmark information and evaluation button
@@ -30,7 +30,7 @@ import getTheme from "../config/theme";
  * @param {string} props.datasetUrl - URL to the Hugging Face dataset
  * @returns {JSX.Element} Benchmark display component
  */
-const BenchmarkDisplay = ({
   sampleQuestions = [],
   onStartEvaluation,
   sessionId,
@@ -40,26 +40,6 @@ const BenchmarkDisplay = ({
   const { mode } = useThemeMode();
   const theme = getTheme(mode);
-  // Default questions if none provided
-  const questions =
-    sampleQuestions.length > 0
-      ? sampleQuestions
-      : [
-          {
-            id: 1,
-            question: "What are the key benefits of the described technology?",
-            answer: "No answer available",
-            type: "single_shot",
-          },
-          {
-            id: 2,
-            question:
-              "Based on the context about machine learning frameworks, how does TensorFlow compare to PyTorch in terms of deployment capabilities?",
-            answer: "No answer available",
-            type: "multi_hop",
-          },
-        ];
   const handleEvaluationClick = () => {
     if (onStartEvaluation) {
       onStartEvaluation();
@@ -139,7 +119,7 @@ const BenchmarkDisplay = ({
       </Typography>
       <Box sx={{ mb: 3 }}>
-        {questions.map((q, index) => (
           <Card
             key={q.id || index}
             variant="outlined"
@@ -179,4 +159,4 @@ const BenchmarkDisplay = ({
   );
 };
-export default BenchmarkDisplay;

 import LinkIcon from "@mui/icons-material/Link";
 import DownloadIcon from "@mui/icons-material/Download";
 import CheckCircleIcon from "@mui/icons-material/CheckCircle";
+import API_CONFIG from "../../config/api";
+import { useThemeMode } from "../../hooks/useThemeMode";
+import getTheme from "../../config/theme";
 /**
  * Component to display benchmark information and evaluation button
  * @param {string} props.datasetUrl - URL to the Hugging Face dataset
  * @returns {JSX.Element} Benchmark display component
  */
+const Display = ({
   sampleQuestions = [],
   onStartEvaluation,
   sessionId,
   const { mode } = useThemeMode();
   const theme = getTheme(mode);
   const handleEvaluationClick = () => {
     if (onStartEvaluation) {
       onStartEvaluation();
       </Typography>
       <Box sx={{ mb: 3 }}>
+        {sampleQuestions.map((q, index) => (
           <Card
             key={q.id || index}
             variant="outlined"
   );
 };
+export default Display;

frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx} RENAMED Viewed

@@ -2,14 +2,15 @@ import React, { useState, useEffect, useRef } from "react";
 import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
 import PlayArrowIcon from "@mui/icons-material/PlayArrow";
 import AccessTimeIcon from "@mui/icons-material/AccessTime";
-import LogDisplay from "./LogDisplay";
 import { useNavigate, useSearchParams } from "react-router-dom";
-import API_CONFIG from "../config/api";
-// Simulation time in milliseconds for pre-calculated documents
 const SIMULATION_DURATION = 80000; // 20 seconds
-// Define all benchmark steps in sequence
 const BENCHMARK_STEPS = [
   "configuration",
   "provider_check",
@@ -20,7 +21,7 @@ const BENCHMARK_STEPS = [
   "single_shot_question_generation",
 ];
-// Step labels for display (more user-friendly names)
 const STEP_LABELS = {
   configuration: "Configuration",
   provider_check: "Finding providers",
@@ -34,7 +35,7 @@ const STEP_LABELS = {
   evaluation_saving_results: "Saving evaluation results",
 };
-// Simulated log messages for pre-calculated documents
 const SIMULATED_LOGS = [
   "[INFO] Initializing benchmark generation...",
   "[INFO] Generating base configuration file...",
@@ -55,18 +56,21 @@ const SIMULATED_LOGS = [
 ];
 /**
- * Component to handle benchmark generation and display logs
  *
- * @param {Object} props - Component props
- * @param {string} props.sessionId - The session ID for the uploaded file
- * @param {boolean} props.isDefaultDocument - Whether this is a pre-calculated document
- * @param {Function} props.onComplete - Function to call when generation is complete
- * @returns {JSX.Element} Benchmark generator component
  */
-const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
   const [searchParams] = useSearchParams();
   const isDefault =
     searchParams.get("isDefault") === "true" || isDefaultDocument;
   const [generating, setGenerating] = useState(false);
   const [generationComplete, setGenerationComplete] = useState(false);
   const [generationLogs, setGenerationLogs] = useState([]);
@@ -76,53 +80,68 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
   const [activeStep, setActiveStep] = useState(1);
   const [elapsedTime, setElapsedTime] = useState(0);
-  // Reference to keep track of the polling interval
   const pollingIntervalRef = useRef(null);
-  // Reference to keep track of the timer interval
   const timerIntervalRef = useRef(null);
-  // Reference for starting time
   const startTimeRef = useRef(null);
-  // Simulation interval reference
   const simulationIntervalRef = useRef(null);
-  // Start generation on component mount
   useEffect(() => {
-    // Set start time
     startTimeRef.current = Date.now();
-    // Reference for the timeout
-    let timeoutRef = null;
-    // Start timer
     timerIntervalRef.current = setInterval(() => {
       const timeElapsed = Math.floor(
         (Date.now() - startTimeRef.current) / 1000
       );
       setElapsedTime(timeElapsed);
-      // Check if the elapsed time exceeds 8 minutes (480 seconds) and we are not in simulation mode
-      if (timeElapsed > 480 && !isDefault && !generationComplete) {
-        // Display an error message in case of timeout
         setError(
           "The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
         );
-        setGenerationComplete(true);
-        // Clear intervals
-        if (pollingIntervalRef.current) {
-          clearInterval(pollingIntervalRef.current);
-        }
-        if (timerIntervalRef.current) {
-          clearInterval(timerIntervalRef.current);
-        }
       }
     }, 1000);
-    // Handler to detect when the page becomes visible again
     const handleVisibilityChange = () => {
       if (
         document.visibilityState === "visible" &&
@@ -130,45 +149,22 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
         !generationComplete
       ) {
         console.log("Page became visible, checking for missed steps...");
-        // Force a new request to retrieve the logs
         const checkCurrentState = async () => {
           try {
-            // First try to retrieve the benchmark logs
-            const logsResponse = await fetch(
-              `${API_CONFIG.BASE_URL}/benchmark-logs/${sessionId}`
             );
-            if (logsResponse.ok) {
-              const logsResult = await logsResponse.json();
-              if (logsResult.logs) {
-                setGenerationLogs(logsResult.logs);
               }
-              // If the task is complete, update the state
-              if (logsResult.is_completed) {
-                setGenerationComplete(true);
-                if (pollingIntervalRef.current) {
-                  clearInterval(pollingIntervalRef.current);
-                }
-                if (onComplete) {
-                  onComplete({
-                    success: true,
-                    sessionId,
-                    logs: logsResult.logs,
-                  });
-                }
-              }
-            } else {
-              // If the benchmark task does not exist, try the configuration logs
-              const configResponse = await fetch(
-                `${API_CONFIG.BASE_URL}/config-logs/${sessionId}`
-              );
-              if (configResponse.ok) {
-                const configResult = await configResponse.json();
-                if (configResult.logs) {
-                  setGenerationLogs(configResult.logs);
-                }
               }
             }
           } catch (error) {
@@ -180,103 +176,89 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
       }
     };
-    // Add the listener for visibility change
     document.addEventListener("visibilitychange", handleVisibilityChange);
     if (isDefault) {
       simulateGeneration();
     } else {
       generateBenchmark();
     }
-    // Clean up the polling interval and timer when the component unmounts
     return () => {
-      if (pollingIntervalRef.current) {
-        clearInterval(pollingIntervalRef.current);
-      }
-      if (timerIntervalRef.current) {
-        clearInterval(timerIntervalRef.current);
-      }
-      if (simulationIntervalRef.current) {
-        clearInterval(simulationIntervalRef.current);
-      }
       document.removeEventListener("visibilitychange", handleVisibilityChange);
     };
   }, [isDefault, sessionId, generationComplete, onComplete]);
-  // Simulate the benchmark generation for pre-calculated documents
   const simulateGeneration = () => {
-    setGenerating(true);
-    setGenerationLogs([]);
-    setError(null);
-    setCurrentPhase("initializing");
-    setCompletedSteps([]);
-    setActiveStep(1);
-    // Timing variables for simulation
     const totalSteps = SIMULATED_LOGS.length;
-    const totalDuration = SIMULATION_DURATION; // 20 seconds
-    const intervalPerStep = totalDuration / totalSteps;
     let currentStep = 0;
-    // Function to add next log message
     const addNextLog = () => {
       if (currentStep < SIMULATED_LOGS.length) {
         const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
         setGenerationLogs(newLogs);
         currentStep++;
-        // Check if completed
         if (currentStep >= SIMULATED_LOGS.length) {
-          // Simulation complete
           setTimeout(() => {
             setCurrentPhase("complete");
-            setGenerationComplete(true);
-            clearInterval(simulationIntervalRef.current);
-            if (onComplete) {
-              onComplete({
-                success: true,
-                sessionId,
-                logs: newLogs,
-              });
-            }
           }, 1000);
         }
       }
     };
-    // Start simulation
     simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
   };
-  // Determine the current phase and completed steps based on logs
   useEffect(() => {
     if (generationLogs.length === 0) return;
-    // Recalculate completed steps completely each time
-    // instead of just adding new steps
     const newCompletedSteps = [];
-    // Check for rate limiting errors
-    const hasRateLimitError = generationLogs.some(
       (log) =>
         log.includes("RATE_LIMIT_EXCEEDED") ||
         log.includes("heavy load") ||
-        log.includes("rate limit")
     );
-    if (hasRateLimitError) {
-      setError(
-        "The demo is under heavy load at the moment. Please try again later."
-      );
-      setGenerationComplete(true);
-      if (pollingIntervalRef.current) {
-        clearInterval(pollingIntervalRef.current);
-      }
       return;
     }
-    // Identify all completed steps in all logs
     generationLogs.forEach((log) => {
       const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
       if (match && match[1]) {
@@ -290,48 +272,48 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
       }
     });
-    // Determine the active step based on completed steps
     let newActiveStep = activeStep;
     if (newCompletedSteps.length > 0) {
-      // Find the most advanced step in the logs
       const maxCompletedStepIndex = Math.max(
         ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
       );
-      // Move to the next step
       const calculatedStep = maxCompletedStepIndex + 1;
-      // Update only if the new step is more advanced than the current step
       if (calculatedStep > activeStep) {
         newActiveStep = calculatedStep;
       }
-      // Ensure that activeStep does not exceed the total number of steps
       if (newActiveStep >= BENCHMARK_STEPS.length) {
         newActiveStep = BENCHMARK_STEPS.length;
       }
     } else if (activeStep === 0) {
-      // If no step is found and the active step is 0, move to 1
       newActiveStep = 1;
     }
-    // Update the state if the steps have changed
     if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
       setCompletedSteps(newCompletedSteps);
     }
-    // Update the active step only if it has changed
     if (newActiveStep !== activeStep) {
       setActiveStep(newActiveStep);
     }
-    // Skip the rest of the log processing if we're simulating
     if (isDefault) return;
-    // Check the latest logs to determine the current phase
-    const recentLogs = generationLogs.slice(-10); // Check more logs
-    // Detect completion conditions
     const isComplete =
       recentLogs.some((log) =>
         log.includes("[SUCCESS] Benchmark process completed successfully")
@@ -344,20 +326,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
     if (isComplete) {
       setCurrentPhase("complete");
-      setGenerationComplete(true);
-      // Stop polling when benchmark is complete
-      if (pollingIntervalRef.current) {
-        clearInterval(pollingIntervalRef.current);
-      }
-      // Notify parent component that generation is complete
-      if (onComplete) {
-        console.log("Notifying parent that generation is complete");
-        onComplete({
-          success: true,
-          sessionId,
-          logs: generationLogs,
-        });
-      }
     } else if (
       recentLogs.some((log) => log.includes("Starting ingestion process"))
     ) {
@@ -376,31 +345,23 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
     isDefault,
   ]);
   const generateBenchmark = async () => {
     if (!sessionId) {
       setError("Missing session ID");
       return;
     }
-    setGenerating(true);
-    setGenerationLogs([]);
-    setError(null);
-    setCurrentPhase("initializing");
-    setCompletedSteps([]);
-    setActiveStep(1);
     try {
-      // Call the API to generate the benchmark
       const response = await fetch(
         `${API_CONFIG.BASE_URL}/generate-benchmark`,
         {
           method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-          },
-          body: JSON.stringify({
-            session_id: sessionId,
-          }),
         }
       );
@@ -409,16 +370,16 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
       if (response.ok) {
         setGenerationLogs(result.logs || []);
-        // Set up polling to track progress
         pollingIntervalRef.current = setInterval(async () => {
-          // Check if we have already completed
           if (generationComplete) {
             clearInterval(pollingIntervalRef.current);
             return;
           }
           try {
-            // Call the API to get the latest logs
             const logsResponse = await fetch(
               `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
             );
@@ -426,7 +387,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
             if (logsResponse.ok) {
               const logsResult = await logsResponse.json();
-              // Update logs if there are new ones
               if (
                 logsResult.logs &&
                 logsResult.logs.length > generationLogs.length
@@ -434,20 +395,19 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
                 setGenerationLogs(logsResult.logs);
               }
-              // Check if the task is complete
               if (logsResult.is_completed) {
                 setGenerationComplete(true);
                 clearInterval(pollingIntervalRef.current);
-                // Notification is now handled in the useEffect above
               }
             }
           } catch (error) {
             console.log("Error polling for logs:", error);
-            // Do not stop polling in case of network errors
           }
-        }, 2000); // Poll every 2 seconds
       } else {
-        // Handle error
         setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
         setError(result.error || "Benchmark generation failed");
       }
@@ -460,29 +420,29 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
     }
   };
-  // Get the current step information for display
   const getCurrentStepInfo = () => {
     const totalSteps = BENCHMARK_STEPS.length;
     const currentStepIndex = activeStep;
-    // If there's no active step yet
     if (currentStepIndex <= 1 && completedSteps.length === 0) {
       return `Starting (1/${totalSteps})`;
     }
-    // If all steps are completed
     if (currentStepIndex >= totalSteps) {
       return `Complete (${totalSteps}/${totalSteps})`;
     }
-    // Get current step name
     const currentStepName =
       STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
     return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
   };
-  // Format elapsed time in HH:MM:SS
   const formatElapsedTime = () => {
     const hours = Math.floor(elapsedTime / 3600);
     const minutes = Math.floor((elapsedTime % 3600) / 60);
@@ -495,13 +455,27 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
     ].join(":");
   };
-  // If complete, stop the timer
   useEffect(() => {
     if (generationComplete && timerIntervalRef.current) {
       clearInterval(timerIntervalRef.current);
     }
   }, [generationComplete]);
   return (
     <Paper
       elevation={3}
@@ -544,9 +518,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
       </Box>
       {error ? (
-        <Alert severity="error" sx={{ width: "100%" }}>
-          {error}
-        </Alert>
       ) : (
         <>
           <CircularProgress size={60} sx={{ mb: 2 }} />
@@ -581,4 +553,4 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
   );
 };
-export default BenchmarkGenerator;

 import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
 import PlayArrowIcon from "@mui/icons-material/PlayArrow";
 import AccessTimeIcon from "@mui/icons-material/AccessTime";
+import LogDisplay from "../LogDisplay";
 import { useNavigate, useSearchParams } from "react-router-dom";
+import API_CONFIG from "../../config/api";
+import ErrorDisplay from "../common/ErrorDisplay";
+// Durée de simulation en millisecondes pour les documents précalculés
 const SIMULATION_DURATION = 80000; // 20 seconds
+// Définir toutes les étapes du benchmark en séquence
 const BENCHMARK_STEPS = [
   "configuration",
   "provider_check",
   "single_shot_question_generation",
 ];
+// Étiquettes des étapes pour l'affichage (noms plus conviviaux)
 const STEP_LABELS = {
   configuration: "Configuration",
   provider_check: "Finding providers",
   evaluation_saving_results: "Saving evaluation results",
 };
+// Messages de log simulés pour les documents précalculés
 const SIMULATED_LOGS = [
   "[INFO] Initializing benchmark generation...",
   "[INFO] Generating base configuration file...",
 ];
 /**
+ * Composant pour gérer la génération de benchmark et afficher les logs
  *
+ * @param {Object} props - Propriétés du composant
+ * @param {string} props.sessionId - ID de session pour le fichier uploadé
+ * @param {boolean} props.isDefaultDocument - S'il s'agit d'un document précalculé
+ * @param {Function} props.onComplete - Fonction à appeler lorsque la génération est terminée
+ * @returns {JSX.Element} Composant de génération de benchmark
  */
+const Generator = ({ sessionId, isDefaultDocument, onComplete }) => {
+  const navigate = useNavigate();
   const [searchParams] = useSearchParams();
   const isDefault =
     searchParams.get("isDefault") === "true" || isDefaultDocument;
+  // États du composant
   const [generating, setGenerating] = useState(false);
   const [generationComplete, setGenerationComplete] = useState(false);
   const [generationLogs, setGenerationLogs] = useState([]);
   const [activeStep, setActiveStep] = useState(1);
   const [elapsedTime, setElapsedTime] = useState(0);
+  // Références pour les intervalles et timers
   const pollingIntervalRef = useRef(null);
   const timerIntervalRef = useRef(null);
   const startTimeRef = useRef(null);
   const simulationIntervalRef = useRef(null);
+  const hasRedirectedRef = useRef(false);
+  // Fonction pour réinitialiser les états de génération
+  const resetGenerationStates = () => {
+    setGenerating(true);
+    setGenerationLogs([]);
+    setError(null);
+    setCurrentPhase("initializing");
+    setCompletedSteps([]);
+    setActiveStep(1);
+  };
+  // Fonction pour arrêter les intervalles
+  const clearAllIntervals = () => {
+    if (pollingIntervalRef.current) clearInterval(pollingIntervalRef.current);
+    if (timerIntervalRef.current) clearInterval(timerIntervalRef.current);
+    if (simulationIntervalRef.current)
+      clearInterval(simulationIntervalRef.current);
+  };
+  // Fonction pour notifier la fin de la génération
+  const notifyGenerationComplete = (success, logs, errorMsg = null) => {
+    setGenerationComplete(true);
+    clearAllIntervals();
+    if (onComplete) {
+      onComplete({
+        success,
+        sessionId,
+        logs: logs || generationLogs,
+        error: errorMsg,
+      });
+    }
+  };
+  // Démarrer la génération au montage du composant
   useEffect(() => {
+    // Configurer l'heure de départ
     startTimeRef.current = Date.now();
+    // Démarrer le timer
     timerIntervalRef.current = setInterval(() => {
       const timeElapsed = Math.floor(
         (Date.now() - startTimeRef.current) / 1000
       );
       setElapsedTime(timeElapsed);
+      // Vérifier si le temps écoulé dépasse 5 minutes et que nous ne sommes pas en mode simulation
+      if (timeElapsed > 300 && !isDefault && !generationComplete) {
         setError(
           "The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
         );
+        notifyGenerationComplete(false, null, "Timeout error");
       }
     }, 1000);
+    // Gestionnaire pour détecter quand la page redevient visible
     const handleVisibilityChange = () => {
       if (
         document.visibilityState === "visible" &&
         !generationComplete
       ) {
         console.log("Page became visible, checking for missed steps...");
+        // Forcer une nouvelle requête pour récupérer les logs
         const checkCurrentState = async () => {
           try {
+            const progressResponse = await fetch(
+              `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
             );
+            if (progressResponse.ok) {
+              const progressResult = await progressResponse.json();
+              if (progressResult.logs) {
+                setGenerationLogs(progressResult.logs);
               }
+              if (progressResult.is_completed) {
+                notifyGenerationComplete(true, progressResult.logs);
               }
             }
           } catch (error) {
       }
     };
+    // Ajouter l'écouteur pour le changement de visibilité
     document.addEventListener("visibilitychange", handleVisibilityChange);
+    // Lancer la simulation ou la génération
     if (isDefault) {
       simulateGeneration();
     } else {
       generateBenchmark();
     }
+    // Nettoyer les intervalles et écouteurs lors du démontage
     return () => {
+      clearAllIntervals();
       document.removeEventListener("visibilitychange", handleVisibilityChange);
     };
   }, [isDefault, sessionId, generationComplete, onComplete]);
+  // Simuler la génération de benchmark pour les documents précalculés
   const simulateGeneration = () => {
+    resetGenerationStates();
+    // Variables de timing pour la simulation
     const totalSteps = SIMULATED_LOGS.length;
+    const intervalPerStep = SIMULATION_DURATION / totalSteps;
     let currentStep = 0;
+    // Fonction pour ajouter le prochain message de log
     const addNextLog = () => {
       if (currentStep < SIMULATED_LOGS.length) {
         const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
         setGenerationLogs(newLogs);
         currentStep++;
+        // Vérifier si terminé
         if (currentStep >= SIMULATED_LOGS.length) {
+          // Simulation terminée
           setTimeout(() => {
             setCurrentPhase("complete");
+            notifyGenerationComplete(true, newLogs);
           }, 1000);
         }
       }
     };
+    // Démarrer la simulation
     simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
   };
+  // Déterminer la phase actuelle et les étapes terminées en fonction des logs
   useEffect(() => {
     if (generationLogs.length === 0) return;
+    // Recalculer les étapes terminées à chaque fois
     const newCompletedSteps = [];
+    // Vérifier les erreurs de limitation de débit et de disponibilité du modèle
+    const hasError = generationLogs.some(
       (log) =>
         log.includes("RATE_LIMIT_EXCEEDED") ||
         log.includes("heavy load") ||
+        log.includes("rate limit") ||
+        log.includes("Required models not available") ||
+        log.includes("Configuration failed") ||
+        log.includes("Error") ||
+        log.includes("ERROR")
     );
+    if (hasError) {
+      const errorMessage =
+        generationLogs.find(
+          (log) =>
+            log.includes("Required models not available") ||
+            log.includes("Configuration failed") ||
+            log.includes("Error generating configuration")
+        ) ||
+        "The demo is under heavy load at the moment. Please try again later.";
+      setError(errorMessage);
+      notifyGenerationComplete(false, null, errorMessage);
       return;
     }
+    // Identifier toutes les étapes terminées dans tous les logs
     generationLogs.forEach((log) => {
       const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
       if (match && match[1]) {
       }
     });
+    // Déterminer l'étape active en fonction des étapes terminées
     let newActiveStep = activeStep;
     if (newCompletedSteps.length > 0) {
+      // Trouver l'étape la plus avancée dans les logs
       const maxCompletedStepIndex = Math.max(
         ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
       );
+      // Passer à l'étape suivante
       const calculatedStep = maxCompletedStepIndex + 1;
+      // Mettre à jour uniquement si la nouvelle étape est plus avancée que l'étape actuelle
       if (calculatedStep > activeStep) {
         newActiveStep = calculatedStep;
       }
+      // S'assurer que activeStep ne dépasse pas le nombre total d'étapes
       if (newActiveStep >= BENCHMARK_STEPS.length) {
         newActiveStep = BENCHMARK_STEPS.length;
       }
     } else if (activeStep === 0) {
+      // Si aucune étape n'est trouvée et que l'étape active est 0, passer à 1
       newActiveStep = 1;
     }
+    // Mettre à jour l'état si les étapes ont changé
     if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
       setCompletedSteps(newCompletedSteps);
     }
+    // Mettre à jour l'étape active uniquement si elle a changé
     if (newActiveStep !== activeStep) {
       setActiveStep(newActiveStep);
     }
+    // Ignorer le reste du traitement des logs si nous simulons
     if (isDefault) return;
+    // Vérifier les derniers logs pour déterminer la phase actuelle
+    const recentLogs = generationLogs.slice(-10);
+    // Détecter les conditions d'achèvement
     const isComplete =
       recentLogs.some((log) =>
         log.includes("[SUCCESS] Benchmark process completed successfully")
     if (isComplete) {
       setCurrentPhase("complete");
+      notifyGenerationComplete(true, generationLogs);
     } else if (
       recentLogs.some((log) => log.includes("Starting ingestion process"))
     ) {
     isDefault,
   ]);
+  // Générer le benchmark
   const generateBenchmark = async () => {
     if (!sessionId) {
       setError("Missing session ID");
       return;
     }
+    resetGenerationStates();
     try {
+      // Appeler l'API pour générer le benchmark
       const response = await fetch(
         `${API_CONFIG.BASE_URL}/generate-benchmark`,
         {
           method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ session_id: sessionId }),
         }
       );
       if (response.ok) {
         setGenerationLogs(result.logs || []);
+        // Configurer le polling pour suivre la progression
         pollingIntervalRef.current = setInterval(async () => {
+          // Vérifier si nous avons déjà terminé
           if (generationComplete) {
             clearInterval(pollingIntervalRef.current);
             return;
           }
           try {
+            // Appeler l'API pour obtenir les derniers logs
             const logsResponse = await fetch(
               `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
             );
             if (logsResponse.ok) {
               const logsResult = await logsResponse.json();
+              // Mettre à jour les logs s'il y en a de nouveaux
               if (
                 logsResult.logs &&
                 logsResult.logs.length > generationLogs.length
                 setGenerationLogs(logsResult.logs);
               }
+              // Vérifier si la tâche est terminée
               if (logsResult.is_completed) {
                 setGenerationComplete(true);
                 clearInterval(pollingIntervalRef.current);
               }
             }
           } catch (error) {
             console.log("Error polling for logs:", error);
+            // Ne pas arrêter le polling en cas d'erreurs réseau
           }
+        }, 2000); // Sondage toutes les 2 secondes
       } else {
+        // Gérer l'erreur
         setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
         setError(result.error || "Benchmark generation failed");
       }
     }
   };
+  // Obtenir les informations sur l'étape actuelle pour l'affichage
   const getCurrentStepInfo = () => {
     const totalSteps = BENCHMARK_STEPS.length;
     const currentStepIndex = activeStep;
+    // S'il n'y a pas encore d'étape active
     if (currentStepIndex <= 1 && completedSteps.length === 0) {
       return `Starting (1/${totalSteps})`;
     }
+    // Si toutes les étapes sont terminées
     if (currentStepIndex >= totalSteps) {
       return `Complete (${totalSteps}/${totalSteps})`;
     }
+    // Obtenir le nom de l'étape actuelle
     const currentStepName =
       STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
     return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
   };
+  // Formater le temps écoulé en HH:MM:SS
   const formatElapsedTime = () => {
     const hours = Math.floor(elapsedTime / 3600);
     const minutes = Math.floor((elapsedTime % 3600) / 60);
     ].join(":");
   };
+  // Si terminé, arrêter le timer
   useEffect(() => {
     if (generationComplete && timerIntervalRef.current) {
       clearInterval(timerIntervalRef.current);
     }
   }, [generationComplete]);
+  const handleGenerationComplete = (result) => {
+    console.log("Benchmark generation completed:", result);
+    if (result && result.success && !hasRedirectedRef.current) {
+      hasRedirectedRef.current = true; // Marquer que la redirection a été faite
+      // Légère pause avant de naviguer pour éviter les problèmes de synchronisation
+      setTimeout(() => {
+        navigate(`/benchmark-display?session=${sessionId}`);
+      }, 500);
+    } else if (result && !result.success) {
+      // Afficher l'erreur au lieu de rediriger
+      setError(result.error || "An error occurred during benchmark generation");
+    }
+  };
   return (
     <Paper
       elevation={3}
       </Box>
       {error ? (
+        <ErrorDisplay error={error} />
       ) : (
         <>
           <CircularProgress size={60} sx={{ mb: 2 }} />
   );
 };
+export default Generator;

frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js ADDED Viewed

	@@ -0,0 +1,192 @@

+import { useState, useEffect } from "react";
+const BENCHMARK_STEPS = [
+  "configuration",
+  "provider_check",
+  "ingestion",
+  "upload_ingest_to_hub",
+  "summarization",
+  "chunking",
+  "single_shot_question_generation",
+];
+export const useBenchmarkLogs = (sessionId, isDefault, onComplete) => {
+  const [generationLogs, setGenerationLogs] = useState([]);
+  const [error, setError] = useState(null);
+  const [currentPhase, setCurrentPhase] = useState("initializing");
+  const [completedSteps, setCompletedSteps] = useState([]);
+  const [activeStep, setActiveStep] = useState(1);
+  const [generationComplete, setGenerationComplete] = useState(false);
+  const checkForErrors = (logs) => {
+    // Check for rate limiting errors
+    const hasRateLimitError = logs.some(
+      (log) =>
+        log.includes("RATE_LIMIT_EXCEEDED") ||
+        log.includes("heavy load") ||
+        log.includes("rate limit")
+    );
+    if (hasRateLimitError) {
+      return {
+        hasError: true,
+        error:
+          "The demo is under heavy load at the moment. Please try again later.",
+      };
+    }
+    // Check for model availability errors
+    const hasModelError = logs.some(
+      (log) =>
+        log.includes("Required models not available") ||
+        log.includes("Some required models are not available")
+    );
+    if (hasModelError) {
+      return {
+        hasError: true,
+        error:
+          "Some required models are not available at the moment. Please try again later.",
+      };
+    }
+    // Check for configuration errors
+    const hasConfigError = logs.some(
+      (log) =>
+        log.includes("Error generating configuration") ||
+        log.includes("Configuration failed")
+    );
+    if (hasConfigError) {
+      return {
+        hasError: true,
+        error:
+          "Failed to generate benchmark configuration. Please try again later.",
+      };
+    }
+    return { hasError: false };
+  };
+  const updateSteps = (logs) => {
+    const newCompletedSteps = [];
+    logs.forEach((log) => {
+      const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
+      if (match && match[1]) {
+        const completedStep = match[1].trim();
+        if (
+          BENCHMARK_STEPS.includes(completedStep) &&
+          !newCompletedSteps.includes(completedStep)
+        ) {
+          newCompletedSteps.push(completedStep);
+        }
+      }
+    });
+    let newActiveStep = activeStep;
+    if (newCompletedSteps.length > 0) {
+      const maxCompletedStepIndex = Math.max(
+        ...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
+      );
+      const calculatedStep = maxCompletedStepIndex + 1;
+      if (calculatedStep > activeStep) {
+        newActiveStep = calculatedStep;
+      }
+      if (newActiveStep >= BENCHMARK_STEPS.length) {
+        newActiveStep = BENCHMARK_STEPS.length;
+      }
+    } else if (activeStep === 0) {
+      newActiveStep = 1;
+    }
+    return { newCompletedSteps, newActiveStep };
+  };
+  const updatePhase = (logs) => {
+    const recentLogs = logs.slice(-10);
+    const isComplete = recentLogs.some((log) =>
+      log.includes("[SUCCESS] Benchmark process completed successfully")
+    );
+    if (isComplete) {
+      return "complete";
+    } else if (
+      recentLogs.some((log) => log.includes("Starting ingestion process"))
+    ) {
+      return "benchmarking";
+    } else if (
+      recentLogs.some((log) => log.includes("Generating base configuration"))
+    ) {
+      return "configuring";
+    }
+    return currentPhase;
+  };
+  useEffect(() => {
+    if (generationLogs.length === 0) return;
+    const errorCheck = checkForErrors(generationLogs);
+    if (errorCheck.hasError) {
+      setError(errorCheck.error);
+      setGenerationComplete(true);
+      if (onComplete) {
+        onComplete({
+          success: false,
+          error: errorCheck.error,
+          sessionId,
+        });
+      }
+      return;
+    }
+    const { newCompletedSteps, newActiveStep } = updateSteps(generationLogs);
+    const newPhase = updatePhase(generationLogs);
+    if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
+      setCompletedSteps(newCompletedSteps);
+    }
+    if (newActiveStep !== activeStep) {
+      setActiveStep(newActiveStep);
+    }
+    if (newPhase !== currentPhase) {
+      setCurrentPhase(newPhase);
+    }
+    // Vérifier si le benchmark est réellement terminé sans erreur
+    const recentLogs = generationLogs.slice(-10);
+    const isComplete = recentLogs.some((log) =>
+      log.includes("[SUCCESS] Benchmark process completed successfully")
+    );
+    if (isComplete) {
+      setGenerationComplete(true);
+      if (onComplete) {
+        onComplete({
+          success: true,
+          sessionId,
+          logs: generationLogs,
+        });
+      }
+    }
+  }, [generationLogs, sessionId, onComplete]);
+  return {
+    generationLogs,
+    setGenerationLogs,
+    error,
+    setError,
+    currentPhase,
+    completedSteps,
+    activeStep,
+    generationComplete,
+    setGenerationComplete,
+  };
+};

frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js ADDED Viewed

	@@ -0,0 +1,106 @@

+import { useRef, useEffect } from "react";
+import API_CONFIG from "../../../config/api";
+export const useBenchmarkPolling = (
+  sessionId,
+  setGenerationLogs,
+  setGenerationComplete,
+  onComplete
+) => {
+  const pollingIntervalRef = useRef(null);
+  const startPolling = () => {
+    if (pollingIntervalRef.current) {
+      clearInterval(pollingIntervalRef.current);
+    }
+    pollingIntervalRef.current = setInterval(async () => {
+      try {
+        const logsResponse = await fetch(
+          `${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
+        );
+        if (logsResponse.ok) {
+          const logsResult = await logsResponse.json();
+          if (logsResult.logs) {
+            setGenerationLogs((prevLogs) => {
+              if (logsResult.logs.length > prevLogs.length) {
+                return logsResult.logs;
+              }
+              return prevLogs;
+            });
+          }
+          // Vérifier s'il y a des erreurs dans les logs
+          const hasError = logsResult.logs.some(
+            (log) =>
+              log.includes("Error") ||
+              log.includes("ERROR") ||
+              log.includes("Failed") ||
+              log.includes("RATE_LIMIT_EXCEEDED") ||
+              log.includes("heavy load") ||
+              log.includes("rate limit")
+          );
+          if (hasError) {
+            setGenerationComplete(true);
+            clearInterval(pollingIntervalRef.current);
+            if (onComplete) {
+              onComplete({
+                success: false,
+                error:
+                  "An error occurred during benchmark generation. Please try again later.",
+                sessionId,
+              });
+            }
+            return;
+          }
+          if (logsResult.is_completed) {
+            setGenerationComplete(true);
+            clearInterval(pollingIntervalRef.current);
+            if (onComplete) {
+              onComplete({
+                success: true,
+                sessionId,
+                logs: logsResult.logs,
+              });
+            }
+          }
+        } else {
+          const errorData = await logsResponse.json();
+          setGenerationComplete(true);
+          clearInterval(pollingIntervalRef.current);
+          if (onComplete) {
+            onComplete({
+              success: false,
+              error: errorData.error || "Unknown error",
+              sessionId,
+            });
+          }
+        }
+      } catch (error) {
+        setGenerationComplete(true);
+        clearInterval(pollingIntervalRef.current);
+        if (onComplete) {
+          onComplete({
+            success: false,
+            error: error.message,
+            sessionId,
+          });
+        }
+      }
+    }, 2000);
+  };
+  useEffect(() => {
+    return () => {
+      if (pollingIntervalRef.current) {
+        clearInterval(pollingIntervalRef.current);
+      }
+    };
+  }, []);
+  return { startPolling };
+};

frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js ADDED Viewed

	@@ -0,0 +1,66 @@

+import { useRef } from "react";
+const SIMULATED_LOGS = [
+  "[INFO] Initializing benchmark generation...",
+  "[INFO] Generating base configuration file...",
+  "[SUCCESS] Stage completed: configuration",
+  "[INFO] Finding available providers for models...",
+  "[SUCCESS] Stage completed: provider_check",
+  "[INFO] Starting ingestion process...",
+  "[SUCCESS] Stage completed: ingestion",
+  "[INFO] Processing document content for upload...",
+  "[SUCCESS] Stage completed: upload_ingest_to_hub",
+  "[INFO] Generating document summary...",
+  "[SUCCESS] Stage completed: summarization",
+  "[INFO] Chunking content for better analysis...",
+  "[SUCCESS] Stage completed: chunking",
+  "[INFO] Generating single-shot questions...",
+  "[SUCCESS] Stage completed: single_shot_question_generation",
+  "[SUCCESS] Benchmark process completed successfully",
+];
+export const useBenchmarkSimulation = (
+  setGenerationLogs,
+  setGenerationComplete,
+  onComplete,
+  sessionId
+) => {
+  const simulationIntervalRef = useRef(null);
+  const SIMULATION_DURATION = 80000; // 20 seconds
+  const startSimulation = () => {
+    setGenerationLogs([]);
+    let currentStep = 0;
+    const addNextLog = () => {
+      if (currentStep < SIMULATED_LOGS.length) {
+        setGenerationLogs((prevLogs) => [
+          ...prevLogs,
+          SIMULATED_LOGS[currentStep],
+        ]);
+        currentStep++;
+        if (currentStep >= SIMULATED_LOGS.length) {
+          setTimeout(() => {
+            setGenerationComplete(true);
+            clearInterval(simulationIntervalRef.current);
+            if (onComplete) {
+              onComplete({
+                success: true,
+                sessionId,
+                logs: SIMULATED_LOGS,
+              });
+            }
+          }, 1000);
+        }
+      }
+    };
+    const totalSteps = SIMULATED_LOGS.length;
+    const intervalPerStep = SIMULATION_DURATION / totalSteps;
+    simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
+  };
+  return { startSimulation };
+};

frontend/src/components/BenchmarkEvaluation.jsx DELETED Viewed

@@ -1,401 +0,0 @@
-import React, { useState, useEffect, useRef } from "react";
-import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
-import { useNavigate, useSearchParams } from "react-router-dom";
-import API_CONFIG from "../config/api";
-// Temps de simulation en millisecondes pour les documents précalculés
-const SIMULATION_DURATION = 120000; // 20 secondes
-// Intervalle de changement des messages pour les documents standards vs précalculés
-const MESSAGE_CHANGE_INTERVAL = {
-  DEFAULT: 25000, // 20 secondes pour documents standards
-  PRECALCULATED: 25000, // 5 secondes pour documents précalculés
-};
-// Starting messages with their timing
-const STARTING_MESSAGES = [
-  { message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
-  { message: "Finding available model providers", step: 2, totalSteps: 5 },
-  { message: "Starting evaluation process", step: 3, totalSteps: 5 },
-  { message: "Evaluating models", step: 4, totalSteps: 5 },
-  { message: "Storing evaluation results", step: 5, totalSteps: 5 },
-];
-const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
-  const [searchParams] = useSearchParams();
-  const isDefault =
-    isDefaultDocument ||
-    ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
-  const [evaluationComplete, setEvaluationComplete] = useState(false);
-  const [error, setError] = useState(null);
-  const [elapsedTime, setElapsedTime] = useState(0);
-  const [startingMessageIndex, setStartingMessageIndex] = useState(0);
-  const [evaluationStarted, setEvaluationStarted] = useState(false);
-  const timerIntervalRef = useRef(null);
-  const startTimeRef = useRef(null);
-  const startingMessageIntervalRef = useRef(null);
-  const pollingIntervalRef = useRef(null);
-  const simulationTimeoutRef = useRef(null);
-  const navigate = useNavigate();
-  // Add effect to handle automatic redirection when evaluation is complete
-  useEffect(() => {
-    if (evaluationComplete) {
-      navigate(`/evaluation-display?session=${sessionId}`);
-    }
-  }, [evaluationComplete, sessionId, navigate]);
-  // Add effect to handle starting messages
-  useEffect(() => {
-    // Ne configurer l'intervalle automatique que pour les documents par défaut
-    // Pour les évaluations réelles, on se fiera uniquement aux mises à jour de l'API
-    if (isDefault) {
-      startingMessageIntervalRef.current = setInterval(() => {
-        setStartingMessageIndex((prev) => {
-          if (prev < STARTING_MESSAGES.length - 1) {
-            return prev + 1;
-          }
-          return prev;
-        });
-      }, MESSAGE_CHANGE_INTERVAL.PRECALCULATED);
-    }
-    return () => {
-      if (startingMessageIntervalRef.current) {
-        clearInterval(startingMessageIntervalRef.current);
-      }
-    };
-  }, [isDefault]);
-  // Start evaluation when component mounts
-  useEffect(() => {
-    // Set start time
-    startTimeRef.current = Date.now();
-    // Start timer
-    timerIntervalRef.current = setInterval(() => {
-      const timeElapsed = Math.floor(
-        (Date.now() - startTimeRef.current) / 1000
-      );
-      setElapsedTime(timeElapsed);
-    }, 1000);
-    // Gestionnaire pour détecter quand la page redevient visible
-    const handleVisibilityChange = () => {
-      if (
-        document.visibilityState === "visible" &&
-        !isDefault &&
-        !evaluationComplete &&
-        evaluationStarted // Vérifier si l'évaluation a déjà commencé
-      ) {
-        console.log("Page became visible, checking evaluation status...");
-        // Force une nouvelle requête pour récupérer l'état d'évaluation
-        const checkEvaluationStatus = async () => {
-          try {
-            const logsResponse = await fetch(
-              `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
-            );
-            if (logsResponse.ok) {
-              const logsResult = await logsResponse.json();
-              if (logsResult.is_completed) {
-                // Mettre fin à l'évaluation si elle est terminée
-                setEvaluationComplete(true);
-                // Avancer à la dernière étape des messages
-                setStartingMessageIndex(STARTING_MESSAGES.length - 1);
-                // Nettoyer les intervalles
-                if (pollingIntervalRef.current) {
-                  clearInterval(pollingIntervalRef.current);
-                }
-                if (startingMessageIntervalRef.current) {
-                  clearInterval(startingMessageIntervalRef.current);
-                }
-              } else {
-                // Si l'évaluation est toujours en cours, utiliser l'étape actuelle du backend
-                if (logsResult.current_step) {
-                  // Utiliser la fonction de mappage pour déterminer l'index du message
-                  const newIndex = mapStepToMessageIndex(
-                    logsResult.current_step
-                  );
-                  setStartingMessageIndex(newIndex);
-                } else {
-                  // Fallback basé sur le temps si l'étape n'est pas disponible
-                  const progress = Math.min(
-                    Math.floor(
-                      (Date.now() - startTimeRef.current) /
-                        MESSAGE_CHANGE_INTERVAL.DEFAULT
-                    ),
-                    STARTING_MESSAGES.length - 1
-                  );
-                  setStartingMessageIndex(progress);
-                }
-              }
-            }
-          } catch (error) {
-            console.error("Error checking evaluation status:", error);
-          }
-        };
-        checkEvaluationStatus();
-      }
-    };
-    // Ajouter l'écouteur pour le changement de visibilité
-    document.addEventListener("visibilitychange", handleVisibilityChange);
-    if (isDefault) {
-      simulateEvaluation();
-    } else {
-      // Démarrer l'évaluation seulement si elle n'a pas déjà été lancée
-      if (!evaluationStarted) {
-        startEvaluation();
-      }
-    }
-    // Clean up intervals on unmount
-    return () => {
-      if (pollingIntervalRef.current) {
-        clearInterval(pollingIntervalRef.current);
-      }
-      if (timerIntervalRef.current) {
-        clearInterval(timerIntervalRef.current);
-      }
-      if (simulationTimeoutRef.current) {
-        clearTimeout(simulationTimeoutRef.current);
-      }
-      document.removeEventListener("visibilitychange", handleVisibilityChange);
-    };
-  }, [isDefault, sessionId, evaluationComplete, evaluationStarted]);
-  // Simulate the evaluation process for pre-calculated documents
-  const simulateEvaluation = () => {
-    // Complete after 20 seconds
-    simulationTimeoutRef.current = setTimeout(() => {
-      setEvaluationComplete(true);
-      if (startingMessageIntervalRef.current) {
-        clearInterval(startingMessageIntervalRef.current);
-      }
-      setStartingMessageIndex(STARTING_MESSAGES.length - 1); // Set to last message
-    }, SIMULATION_DURATION);
-  };
-  // Format elapsed time as HH:MM:SS
-  const formatElapsedTime = () => {
-    const hours = Math.floor(elapsedTime / 3600);
-    const minutes = Math.floor((elapsedTime % 3600) / 60);
-    const seconds = elapsedTime % 60;
-    return [
-      hours.toString().padStart(2, "0"),
-      minutes.toString().padStart(2, "0"),
-      seconds.toString().padStart(2, "0"),
-    ].join(":");
-  };
-  // Fonction pour mapper le nom de l'étape backend vers l'index dans STARTING_MESSAGES
-  const mapStepToMessageIndex = (currentStep) => {
-    switch (currentStep) {
-      case "initializing":
-        return 0;
-      case "finding_available_model_providers":
-        return 1;
-      case "starting_evaluation_process":
-        return 2;
-      case "evaluating_models":
-        return 3;
-      case "storing_evaluation_results":
-      case "completed":
-        return 4;
-      default:
-        // Calculer l'étape en fonction du temps écoulé si l'étape n'est pas reconnue
-        const elapsedSinceStart = Date.now() - startTimeRef.current;
-        const estimatedTotalTime = 80000; // 80 secondes
-        const estimatedProgress = Math.min(
-          elapsedSinceStart / estimatedTotalTime,
-          1
-        );
-        return Math.min(
-          Math.floor(estimatedProgress * STARTING_MESSAGES.length),
-          STARTING_MESSAGES.length - 1
-        );
-    }
-  };
-  // Start benchmark evaluation
-  const startEvaluation = async () => {
-    if (!sessionId) {
-      setError("Missing session ID");
-      return;
-    }
-    // Marquer que l'évaluation a commencé
-    setEvaluationStarted(true);
-    try {
-      // Call API to start evaluation
-      const response = await fetch(
-        `${API_CONFIG.BASE_URL}/evaluate-benchmark`,
-        {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-          },
-          body: JSON.stringify({
-            session_id: sessionId,
-          }),
-        }
-      );
-      const result = await response.json();
-      if (response.ok) {
-        // Set up polling to check completion
-        pollingIntervalRef.current = setInterval(async () => {
-          try {
-            const logsResponse = await fetch(
-              `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
-            );
-            if (logsResponse.ok) {
-              const logsResult = await logsResponse.json();
-              // Vérifier si l'évaluation est terminée
-              if (logsResult.is_completed) {
-                setEvaluationComplete(true);
-                // Avancer à la dernière étape du message
-                setStartingMessageIndex(STARTING_MESSAGES.length - 1);
-                // Arrêter les intervalles
-                clearInterval(pollingIntervalRef.current);
-                if (startingMessageIntervalRef.current) {
-                  clearInterval(startingMessageIntervalRef.current);
-                }
-              } else {
-                // Récupérer l'étape actuelle à partir de l'API, si disponible
-                if (logsResult.current_step) {
-                  // Utiliser la fonction de mappage pour déterminer l'index du message
-                  const newIndex = mapStepToMessageIndex(
-                    logsResult.current_step
-                  );
-                  setStartingMessageIndex(newIndex);
-                } else {
-                  // Fallback: Si l'API ne renvoie pas d'étape, estimer en fonction du temps
-                  const elapsedSinceStart = Date.now() - startTimeRef.current;
-                  const estimatedTotalTime = 80000; // 80 secondes
-                  const estimatedProgress = Math.min(
-                    elapsedSinceStart / estimatedTotalTime,
-                    1
-                  );
-                  const estimatedStepIndex = Math.min(
-                    Math.floor(estimatedProgress * STARTING_MESSAGES.length),
-                    STARTING_MESSAGES.length - 1
-                  );
-                  setStartingMessageIndex(estimatedStepIndex);
-                }
-              }
-            }
-          } catch (error) {
-            console.log("Error polling logs:", error);
-            // Ne pas arrêter le polling en cas d'erreurs réseau temporaires
-          }
-        }, 2000);
-      } else {
-        setError(result.error || "Benchmark evaluation failed");
-      }
-    } catch (error) {
-      console.error("Error starting evaluation:", error);
-      setError("Error connecting to server");
-    }
-  };
-  return (
-    <Paper
-      elevation={3}
-      sx={{
-        p: 4,
-        mt: 3,
-        mb: 3,
-        display: "flex",
-        flexDirection: "column",
-        alignItems: "center",
-        justifyContent: "center",
-        minHeight: 200,
-        position: "relative",
-      }}
-    >
-      {/* Temps estimé */}
-      <Box
-        sx={{
-          position: "absolute",
-          top: 12,
-          right: 12,
-          backgroundColor: "rgba(0, 0, 0, 0.04)",
-          borderRadius: "4px",
-          px: 1,
-          py: 0.5,
-          display: "inline-flex",
-          alignItems: "center",
-        }}
-      >
-        <Typography
-          variant="caption"
-          sx={{
-            fontSize: "0.675rem",
-            color: "text.secondary",
-            fontWeight: 500,
-          }}
-        >
-          Estimated time ~ 1m30s
-        </Typography>
-      </Box>
-      {error ? (
-        <Alert severity="error" sx={{ width: "100%" }}>
-          {error}
-        </Alert>
-      ) : (
-        <>
-          {evaluationComplete ? (
-            <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
-              Evaluation completed successfully!
-            </Alert>
-          ) : (
-            <>
-              <CircularProgress size={60} sx={{ mb: 2 }} />
-              <Typography variant="h6" component="div" gutterBottom>
-                Benchmark evaluation...
-              </Typography>
-              {/* Step progress indicator */}
-              <Typography variant="body1" color="text.secondary">
-                {`${STARTING_MESSAGES[startingMessageIndex].message} (${STARTING_MESSAGES[startingMessageIndex].step}/${STARTING_MESSAGES[startingMessageIndex].totalSteps})`}
-              </Typography>
-              {/* Timer display */}
-              <Box
-                sx={{
-                  display: "flex",
-                  alignItems: "center",
-                  mt: 1,
-                  color: "text.secondary",
-                  opacity: 0.5,
-                }}
-              >
-                <Typography variant="body2">{formatElapsedTime()}</Typography>
-              </Box>
-            </>
-          )}
-        </>
-      )}
-    </Paper>
-  );
-};
-export default BenchmarkEvaluation;

frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx} RENAMED Viewed

@@ -18,6 +18,7 @@ import {
 } from "@mui/material";
 import OpenInNewIcon from "@mui/icons-material/OpenInNew";
 import CheckCircleIcon from "@mui/icons-material/CheckCircle";
 // Styles pour les médailles
 const MEDAL_STYLES = {
@@ -85,7 +86,7 @@ const getMedalStyle = (rank) => {
   };
 };
-const EvaluationDisplay = ({ sessionId, results }) => {
   // Format accuracy as percentage
   const formatAccuracy = (value) => {
     return `${(value * 100).toFixed(2)}\u2009%`;
@@ -118,9 +119,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
     results.models_comparison.length === 0
   ) {
     return (
-      <Alert severity="info" sx={{ mt: 4, mb: 4 }}>
-        The demo is currently under heavy load, please try again later.
-      </Alert>
     );
   }
@@ -130,9 +132,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
   );
   if (successfulModels.length === 0) {
     return (
-      <Alert severity="warning" sx={{ mt: 4, mb: 4 }}>
-        The demo is currently under heavy load, please try again later.
-      </Alert>
     );
   }
@@ -295,4 +298,4 @@ const EvaluationDisplay = ({ sessionId, results }) => {
   );
 };
-export default EvaluationDisplay;

 } from "@mui/material";
 import OpenInNewIcon from "@mui/icons-material/OpenInNew";
 import CheckCircleIcon from "@mui/icons-material/CheckCircle";
+import ErrorDisplay from "../common/ErrorDisplay";
 // Styles pour les médailles
 const MEDAL_STYLES = {
   };
 };
+const Display = ({ sessionId, results }) => {
   // Format accuracy as percentage
   const formatAccuracy = (value) => {
     return `${(value * 100).toFixed(2)}\u2009%`;
     results.models_comparison.length === 0
   ) {
     return (
+      <ErrorDisplay
+        error="The demo is currently under heavy load, please try again later."
+        title="Service Unavailable"
+      />
     );
   }
   );
   if (successfulModels.length === 0) {
     return (
+      <ErrorDisplay
+        error="The demo is currently under heavy load, please try again later."
+        title="Service Unavailable"
+      />
     );
   }
   );
 };
+export default Display;

frontend/src/components/Evaluation/Evaluation.jsx ADDED Viewed

	@@ -0,0 +1,150 @@

+import React from "react";
+import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
+import { useNavigate, useSearchParams } from "react-router-dom";
+import ErrorOutlineIcon from "@mui/icons-material/ErrorOutline";
+import { useSimulation } from "./hooks/useSimulation";
+import { useTimer } from "./hooks/useTimer";
+import { useEvaluation } from "./hooks/useEvaluation";
+import ErrorDisplay from "../common/ErrorDisplay";
+const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
+  const [searchParams] = useSearchParams();
+  const isDefault =
+    isDefaultDocument ||
+    ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
+  const navigate = useNavigate();
+  // Use our custom hooks
+  const { formatElapsedTime, stopTimer } = useTimer();
+  const {
+    startingMessageIndex,
+    evaluationComplete: simulationComplete,
+    currentMessage,
+  } = useSimulation(() => {
+    if (onComplete) {
+      onComplete();
+    }
+  });
+  const {
+    error,
+    evaluationComplete: realComplete,
+    currentStep,
+    evaluationStarted,
+    startEvaluation,
+    currentStepLabel,
+    totalSteps,
+  } = useEvaluation(sessionId, () => {
+    if (onComplete) {
+      onComplete();
+    }
+  });
+  // Handle automatic redirection when evaluation is complete
+  React.useEffect(() => {
+    if (realComplete || simulationComplete) {
+      navigate(`/evaluation-display?session=${sessionId}`);
+    }
+  }, [realComplete, simulationComplete, sessionId, navigate]);
+  // Start evaluation if not default and not started
+  React.useEffect(() => {
+    if (!isDefault && !evaluationStarted) {
+      startEvaluation();
+    }
+  }, [isDefault, evaluationStarted, startEvaluation]);
+  // Stop timer when complete
+  React.useEffect(() => {
+    if (realComplete || simulationComplete) {
+      stopTimer();
+    }
+  }, [realComplete, simulationComplete, stopTimer]);
+  const isComplete = realComplete || simulationComplete;
+  const currentStepInfo = isDefault
+    ? `${currentMessage.message} (${currentMessage.step}/${currentMessage.totalSteps})`
+    : `${currentStepLabel} (${currentStep + 1}/${totalSteps})`;
+  return (
+    <Paper
+      elevation={3}
+      sx={{
+        p: 4,
+        mt: 3,
+        mb: 3,
+        display: "flex",
+        flexDirection: "column",
+        alignItems: "center",
+        justifyContent: "center",
+        minHeight: 200,
+        position: "relative",
+      }}
+    >
+      {/* Temps estimé */}
+      <Box
+        sx={{
+          position: "absolute",
+          top: 12,
+          right: 12,
+          backgroundColor: "rgba(0, 0, 0, 0.04)",
+          borderRadius: "4px",
+          px: 1,
+          py: 0.5,
+          display: "inline-flex",
+          alignItems: "center",
+        }}
+      >
+        <Typography
+          variant="caption"
+          sx={{
+            fontSize: "0.675rem",
+            color: "text.secondary",
+            fontWeight: 500,
+          }}
+        >
+          Estimated time ~ 1m30s
+        </Typography>
+      </Box>
+      {error ? (
+        <ErrorDisplay error={error} />
+      ) : (
+        <>
+          {isComplete ? (
+            <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
+              Evaluation completed successfully!
+            </Alert>
+          ) : (
+            <>
+              <CircularProgress size={60} sx={{ mb: 2 }} />
+              <Typography variant="h6" component="div" gutterBottom>
+                Benchmark evaluation...
+              </Typography>
+              {/* Step progress indicator */}
+              <Typography variant="body1" color="text.secondary">
+                {currentStepInfo}
+              </Typography>
+              {/* Timer display */}
+              <Box
+                sx={{
+                  display: "flex",
+                  alignItems: "center",
+                  mt: 1,
+                  color: "text.secondary",
+                  opacity: 0.5,
+                }}
+              >
+                <Typography variant="body2">{formatElapsedTime()}</Typography>
+              </Box>
+            </>
+          )}
+        </>
+      )}
+    </Paper>
+  );
+};
+export default BenchmarkEvaluation;

frontend/src/components/Evaluation/hooks/useEvaluation.js ADDED Viewed

	@@ -0,0 +1,148 @@

+import { useState, useRef, useEffect } from "react";
+import API_CONFIG from "../../../config/api";
+// Define all evaluation steps in sequence
+const EVALUATION_STEPS = [
+  "initializing",
+  "finding_available_model_providers",
+  "starting_evaluation_process",
+  "evaluating_models",
+  "storing_evaluation_results",
+];
+// Step labels for display
+const STEP_LABELS = {
+  initializing: "Initializing evaluation environment",
+  finding_available_model_providers: "Finding available model providers",
+  starting_evaluation_process: "Starting evaluation process",
+  evaluating_models: "Evaluating models",
+  storing_evaluation_results: "Storing evaluation results",
+};
+// Error messages that should be treated as errors
+const ERROR_MESSAGES = [
+  "heavy load",
+  "try again later",
+  "rate limit",
+  "RATE_LIMIT_EXCEEDED",
+];
+export const useEvaluation = (sessionId, onComplete) => {
+  const [error, setError] = useState(null);
+  const [evaluationComplete, setEvaluationComplete] = useState(false);
+  const [currentStep, setCurrentStep] = useState(0);
+  const [evaluationStarted, setEvaluationStarted] = useState(false);
+  const pollingIntervalRef = useRef(null);
+  const mapStepToIndex = (step) => {
+    return EVALUATION_STEPS.indexOf(step);
+  };
+  const checkForErrors = (logs) => {
+    if (!logs) return false;
+    const hasError = ERROR_MESSAGES.some((errorMessage) =>
+      logs.some((log) => log.toLowerCase().includes(errorMessage.toLowerCase()))
+    );
+    if (hasError) {
+      setError(
+        "The demo is currently under heavy load, please try again later."
+      );
+      setEvaluationComplete(true);
+      if (pollingIntervalRef.current) {
+        clearInterval(pollingIntervalRef.current);
+      }
+      return true;
+    }
+    return false;
+  };
+  const startEvaluation = async () => {
+    if (!sessionId) {
+      setError("Missing session ID");
+      return;
+    }
+    setEvaluationStarted(true);
+    try {
+      const response = await fetch(
+        `${API_CONFIG.BASE_URL}/evaluate-benchmark`,
+        {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify({
+            session_id: sessionId,
+          }),
+        }
+      );
+      const result = await response.json();
+      if (response.ok) {
+        setupPolling();
+      } else {
+        setError(result.error || "Benchmark evaluation failed");
+      }
+    } catch (error) {
+      console.error("Error starting evaluation:", error);
+      setError("Error connecting to server");
+    }
+  };
+  const setupPolling = () => {
+    pollingIntervalRef.current = setInterval(async () => {
+      try {
+        const logsResponse = await fetch(
+          `${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
+        );
+        if (logsResponse.ok) {
+          const logsResult = await logsResponse.json();
+          // Check for error messages in logs
+          if (checkForErrors(logsResult.logs)) {
+            return;
+          }
+          if (logsResult.is_completed) {
+            setEvaluationComplete(true);
+            clearInterval(pollingIntervalRef.current);
+            if (onComplete) {
+              onComplete();
+            }
+          } else if (logsResult.current_step) {
+            const newStepIndex = mapStepToIndex(logsResult.current_step);
+            if (newStepIndex !== -1) {
+              setCurrentStep(newStepIndex);
+            }
+          }
+        }
+      } catch (error) {
+        console.log("Error polling logs:", error);
+      }
+    }, 2000);
+  };
+  useEffect(() => {
+    return () => {
+      if (pollingIntervalRef.current) {
+        clearInterval(pollingIntervalRef.current);
+      }
+    };
+  }, []);
+  return {
+    error,
+    evaluationComplete,
+    currentStep,
+    evaluationStarted,
+    startEvaluation,
+    currentStepLabel:
+      STEP_LABELS[EVALUATION_STEPS[currentStep]] || "Processing",
+    totalSteps: EVALUATION_STEPS.length,
+  };
+};

frontend/src/components/Evaluation/hooks/useSimulation.js ADDED Viewed

	@@ -0,0 +1,59 @@

+import { useState, useRef, useEffect } from "react";
+// Simulation time in milliseconds for pre-calculated documents
+const SIMULATION_DURATION = 120000; // 2 minutes
+// Starting messages with their timing
+const STARTING_MESSAGES = [
+  { message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
+  { message: "Finding available model providers", step: 2, totalSteps: 5 },
+  { message: "Starting evaluation process", step: 3, totalSteps: 5 },
+  { message: "Evaluating models", step: 4, totalSteps: 5 },
+  { message: "Storing evaluation results", step: 5, totalSteps: 5 },
+];
+export const useSimulation = (onComplete) => {
+  const [startingMessageIndex, setStartingMessageIndex] = useState(0);
+  const [evaluationComplete, setEvaluationComplete] = useState(false);
+  const simulationTimeoutRef = useRef(null);
+  const startingMessageIntervalRef = useRef(null);
+  useEffect(() => {
+    // Configure automatic interval for message changes
+    startingMessageIntervalRef.current = setInterval(() => {
+      setStartingMessageIndex((prev) => {
+        if (prev < STARTING_MESSAGES.length - 1) {
+          return prev + 1;
+        }
+        return prev;
+      });
+    }, SIMULATION_DURATION / STARTING_MESSAGES.length);
+    // Complete after simulation duration
+    simulationTimeoutRef.current = setTimeout(() => {
+      setEvaluationComplete(true);
+      if (startingMessageIntervalRef.current) {
+        clearInterval(startingMessageIntervalRef.current);
+      }
+      setStartingMessageIndex(STARTING_MESSAGES.length - 1);
+      if (onComplete) {
+        onComplete();
+      }
+    }, SIMULATION_DURATION);
+    return () => {
+      if (simulationTimeoutRef.current) {
+        clearTimeout(simulationTimeoutRef.current);
+      }
+      if (startingMessageIntervalRef.current) {
+        clearInterval(startingMessageIntervalRef.current);
+      }
+    };
+  }, [onComplete]);
+  return {
+    startingMessageIndex,
+    evaluationComplete,
+    currentMessage: STARTING_MESSAGES[startingMessageIndex],
+  };
+};

frontend/src/components/Evaluation/hooks/useTimer.js ADDED Viewed

	@@ -0,0 +1,48 @@

+import { useState, useRef, useEffect } from "react";
+export const useTimer = () => {
+  const [elapsedTime, setElapsedTime] = useState(0);
+  const timerIntervalRef = useRef(null);
+  const startTimeRef = useRef(null);
+  const startTimer = () => {
+    startTimeRef.current = Date.now();
+    timerIntervalRef.current = setInterval(() => {
+      const timeElapsed = Math.floor(
+        (Date.now() - startTimeRef.current) / 1000
+      );
+      setElapsedTime(timeElapsed);
+    }, 1000);
+  };
+  const stopTimer = () => {
+    if (timerIntervalRef.current) {
+      clearInterval(timerIntervalRef.current);
+    }
+  };
+  const formatElapsedTime = () => {
+    const hours = Math.floor(elapsedTime / 3600);
+    const minutes = Math.floor((elapsedTime % 3600) / 60);
+    const seconds = elapsedTime % 60;
+    return [
+      hours.toString().padStart(2, "0"),
+      minutes.toString().padStart(2, "0"),
+      seconds.toString().padStart(2, "0"),
+    ].join(":");
+  };
+  useEffect(() => {
+    startTimer();
+    return () => {
+      stopTimer();
+    };
+  }, []);
+  return {
+    elapsedTime,
+    formatElapsedTime,
+    stopTimer,
+  };
+};

frontend/src/components/Footer/Footer.js CHANGED Viewed

@@ -6,21 +6,23 @@ const Footer = () => {
     <Box
       component="footer"
       sx={{
-        width: "100%",
         py: 4,
         textAlign: "center",
       }}
     >
       <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
-        © 2024 Hugging Face - Open LLM Leaderboard - Made with 🤗 by the HF team
-        -{" "}
         <Link
-          href="https://huggingface.co"
           target="_blank"
           rel="noopener noreferrer"
           color="inherit"
         >
-          huggingface.co
         </Link>
       </Typography>
     </Box>

     <Box
       component="footer"
       sx={{
+        width: "70%",
+        margin: "0 auto",
         py: 4,
         textAlign: "center",
+        opacity: 0.7,
       }}
     >
       <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
+        We keep processed documents for research purposes, to which you agree by
+        using the space. For a fully private usage, please duplicate the{" "}
         <Link
+          href="https://huggingface.co/spaces/yourbench/advanced"
           target="_blank"
           rel="noopener noreferrer"
           color="inherit"
         >
+          advanced demo space
         </Link>
       </Typography>
     </Box>

frontend/src/components/Intro.jsx CHANGED Viewed

@@ -1,5 +1,5 @@
 import React from "react";
-import { Box, Typography } from "@mui/material";
 import HFLogo from "./Logo/HFLogo";
 const Intro = () => {
@@ -42,7 +42,19 @@ const Intro = () => {
         YourBench is an <b>open-source framework</b> for generating{" "}
         <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
         to keep your large language models on their toes—even as new data
-        sources, domains, and knowledge demands evolve. Currently, this is an extremely minimal demo. To unlock the full capabilities, please visit our GitHub!
       </Typography>
     </Box>
   );

 import React from "react";
+import { Box, Typography, Link } from "@mui/material";
 import HFLogo from "./Logo/HFLogo";
 const Intro = () => {
         YourBench is an <b>open-source framework</b> for generating{" "}
         <b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
         to keep your large language models on their toes—even as new data
+        sources, domains, and knowledge demands evolve.
+        <br />
+        <br /> Currently, this is an <b>extremely minimal demo</b>. <br />
+        To <b>unlock the full capabilities</b>, please visit our{" "}
+        <Link
+          href="https://github.com/yourbench"
+          target="_blank"
+          rel="noopener noreferrer"
+          color="inherit"
+        >
+          <b>GitHub</b>
+        </Link>
+        !
       </Typography>
     </Box>
   );

frontend/src/components/KeyboardShortcuts.jsx DELETED Viewed

@@ -1,24 +0,0 @@
-import React, { useEffect } from "react";
-function KeyboardShortcuts() {
-  useEffect(() => {
-    const handleKeyDown = (e) => {
-      if (e.key === "p") {
-        console.log("Debug key pressed: Clearing auth data and refreshing");
-        localStorage.removeItem("hf_oauth");
-        localStorage.removeItem("auth_return_to");
-        alert("Auth data cleared. Page will reload.");
-        window.location.reload();
-      }
-    };
-    window.addEventListener("keydown", handleKeyDown);
-    return () => {
-      window.removeEventListener("keydown", handleKeyDown);
-    };
-  }, []);
-  return null;
-}
-export default KeyboardShortcuts;

frontend/src/components/{ExternalLinks.jsx → Navigation.jsx} RENAMED Viewed

@@ -13,7 +13,7 @@ import OpenInNewIcon from "@mui/icons-material/OpenInNew";
 import ShareIcon from "@mui/icons-material/Share";
 import MenuIcon from "@mui/icons-material/Menu";
-const ExternalLinks = () => {
   const [anchorEl, setAnchorEl] = useState(null);
   const theme = useTheme();
   const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
@@ -44,7 +44,7 @@ const ExternalLinks = () => {
       url: "https://github.com/huggingface/yourbench",
     },
     {
-      name: "Full demo",
       url: "https://huggingface.co/spaces/yourbench/advanced",
     },
   ];
@@ -175,4 +175,4 @@ const ExternalLinks = () => {
   );
 };
-export default ExternalLinks;

 import ShareIcon from "@mui/icons-material/Share";
 import MenuIcon from "@mui/icons-material/Menu";
+const Navigation = () => {
   const [anchorEl, setAnchorEl] = useState(null);
   const theme = useTheme();
   const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
       url: "https://github.com/huggingface/yourbench",
     },
     {
+      name: "Advanced demo",
       url: "https://huggingface.co/spaces/yourbench/advanced",
     },
   ];
   );
 };
+export default Navigation;

frontend/src/components/common/ErrorDisplay.jsx ADDED Viewed

	@@ -0,0 +1,43 @@

+import React from "react";
+import { Box, Typography } from "@mui/material";
+import SentimentVeryDissatisfiedIcon from "@mui/icons-material/SentimentVeryDissatisfied";
+/**
+ * Generic error display component with centered icon and text
+ * @param {Object} props
+ * @param {string} props.error - The error message to display
+ * @param {string} [props.title="Error"] - Optional custom title
+ * @param {Object} [props.sx={}] - Optional additional styles
+ */
+const ErrorDisplay = ({ error, title = "Error", sx = {} }) => {
+  return (
+    <Box
+      sx={{
+        display: "flex",
+        flexDirection: "column",
+        alignItems: "center",
+        justifyContent: "center",
+        p: 4,
+        gap: 2,
+        ...sx,
+      }}
+    >
+      <SentimentVeryDissatisfiedIcon
+        sx={{ fontSize: 60, color: "warning.main" }}
+      />
+      <Typography variant="h6" color="warning">
+        {title}
+      </Typography>
+      <Typography
+        variant="body1"
+        align="center"
+        color="text.secondary"
+        sx={{ maxWidth: "80%", lineHeight: 1.5 }}
+      >
+        {error}
+      </Typography>
+    </Box>
+  );
+};
+export default ErrorDisplay;

frontend/src/components/shared/AuthContainer.js DELETED Viewed

@@ -1,192 +0,0 @@
-import React, { useEffect } from "react";
-import {
-  Box,
-  Typography,
-  Button,
-  Chip,
-  Stack,
-  Paper,
-  CircularProgress,
-  useTheme,
-  useMediaQuery,
-} from "@mui/material";
-import HFLogo from "../Logo/HFLogo";
-import { useAuth } from "../../hooks/useAuth";
-import LogoutIcon from "@mui/icons-material/Logout";
-import { useNavigate } from "react-router-dom";
-function AuthContainer({ actionText = "DO_ACTION", onSuccess }) {
-  const { isAuthenticated, user, login, logout, loading } = useAuth();
-  const navigate = useNavigate();
-  const theme = useTheme();
-  const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
-  // Trigger onSuccess callback when user is authenticated
-  useEffect(() => {
-    if (isAuthenticated && onSuccess) {
-      // Add a small delay to ensure UI is updated properly
-      setTimeout(() => {
-        console.log("User is authenticated, calling onSuccess callback");
-        onSuccess();
-      }, 100);
-    }
-  }, [isAuthenticated, onSuccess]);
-  // Check localStorage manually as a fallback
-  useEffect(() => {
-    if (!isAuthenticated && !loading && onSuccess) {
-      const storedAuth = localStorage.getItem("hf_oauth");
-      if (storedAuth) {
-        console.log(
-          "Found auth data in localStorage but isAuthenticated is false, forcing onSuccess"
-        );
-        onSuccess();
-      }
-    }
-  }, [isAuthenticated, loading, onSuccess]);
-  const handleLogout = () => {
-    if (isAuthenticated && logout) {
-      logout();
-      navigate("/", { replace: true });
-      window.location.reload();
-    }
-  };
-  if (loading) {
-    return (
-      <Paper
-        elevation={0}
-        sx={{
-          p: 3,
-          mb: 4,
-          border: "1px solid",
-          borderColor: "grey.300",
-          display: "flex",
-          flexDirection: "column",
-          alignItems: "center",
-          gap: 2,
-        }}
-      >
-        <CircularProgress size={24} />
-      </Paper>
-    );
-  }
-  if (!isAuthenticated) {
-    return (
-      <Paper
-        elevation={0}
-        sx={{
-          p: 3,
-          mb: 4,
-          border: "1px solid",
-          borderColor: "grey.300",
-          display: "flex",
-          flexDirection: "column",
-          alignItems: "center",
-          gap: 2,
-        }}
-      >
-        <Typography variant="h6" align="center">
-          Login to {actionText}
-        </Typography>
-        <Typography
-          variant="body2"
-          color="text.secondary"
-          align="center"
-          sx={{
-            px: isMobile ? 2 : 0,
-          }}
-        >
-          You need to be logged in with your Hugging Face account to{" "}
-          {actionText.toLowerCase()}
-        </Typography>
-        <Button
-          variant="contained"
-          onClick={login}
-          startIcon={
-            <Box
-              sx={{
-                width: 20,
-                height: 20,
-                display: "flex",
-                alignItems: "center",
-              }}
-            >
-              <HFLogo />
-            </Box>
-          }
-          sx={{
-            textTransform: "none",
-            fontWeight: 600,
-            py: 1,
-            px: 2,
-            width: isMobile ? "100%" : "auto",
-          }}
-        >
-          Sign in with Hugging Face
-        </Button>
-      </Paper>
-    );
-  }
-  return (
-    <Paper
-      elevation={0}
-      sx={{ p: 2, border: "1px solid", borderColor: "grey.300", mb: 4 }}
-    >
-      <Stack
-        direction={isMobile ? "column" : "row"}
-        spacing={2}
-        alignItems={isMobile ? "stretch" : "center"}
-        justifyContent="space-between"
-      >
-        <Stack
-          direction={isMobile ? "column" : "row"}
-          spacing={1}
-          alignItems={isMobile ? "stretch" : "center"}
-          sx={{ width: "100%" }}
-        >
-          <Typography
-            variant="body1"
-            align={isMobile ? "center" : "left"}
-            sx={{ mb: isMobile ? 1 : 0 }}
-          >
-            Connected as <strong>{user?.username}</strong>
-          </Typography>
-          <Chip
-            label={`Ready to ${actionText}`}
-            color="success"
-            size="small"
-            variant="outlined"
-            sx={{
-              width: isMobile ? "100%" : "auto",
-              height: isMobile ? 32 : 24,
-              "& .MuiChip-label": {
-                px: isMobile ? 2 : 1,
-              },
-            }}
-          />
-        </Stack>
-        <Button
-          variant="contained"
-          onClick={handleLogout}
-          endIcon={<LogoutIcon />}
-          color="primary"
-          sx={{
-            minWidth: 120,
-            height: 36,
-            textTransform: "none",
-            fontSize: "0.9375rem",
-            width: isMobile ? "100%" : "auto",
-          }}
-        >
-          Logout
-        </Button>
-      </Stack>
-    </Paper>
-  );
-}
-export default AuthContainer;

frontend/src/components/shared/CodeBlock.js DELETED Viewed

@@ -1,37 +0,0 @@
-import React from 'react';
-import { Box, IconButton } from '@mui/material';
-import ContentCopyIcon from '@mui/icons-material/ContentCopy';
-const CodeBlock = ({ code }) => (
-  <Box sx={{ position: 'relative' }}>
-    <IconButton
-      onClick={() => navigator.clipboard.writeText(code)}
-      sx={{
-        position: 'absolute',
-        top: 8,
-        right: 8,
-        color: 'grey.500',
-        '&:hover': { color: 'grey.300' },
-      }}
-    >
-      <ContentCopyIcon fontSize="small" />
-    </IconButton>
-    <Box
-      sx={{
-        backgroundColor: 'grey.900',
-        color: 'grey.100',
-        p: 2,
-        borderRadius: 1,
-        fontFamily: 'monospace',
-        fontSize: '0.875rem',
-        overflowX: 'auto',
-        textAlign: 'left',
-        whiteSpace: 'pre',
-      }}
-    >
-      {code}
-    </Box>
-  </Box>
-);
-export default CodeBlock;

frontend/src/components/shared/FilterTag.js DELETED Viewed

@@ -1,139 +0,0 @@
-import React from "react";
-import { Chip } from "@mui/material";
-import { useTheme } from "@mui/material/styles";
-import { alpha } from "@mui/material/styles";
-import CheckBoxOutlineBlankIcon from "@mui/icons-material/CheckBoxOutlineBlank";
-import CheckBoxOutlinedIcon from "@mui/icons-material/CheckBoxOutlined";
-const FilterTag = ({
-  label,
-  checked,
-  onChange,
-  count,
-  isHideFilter = false,
-  totalCount = 0,
-  variant = "tag",
-  showCheckbox = false,
-  stacked = false,
-  sx = {},
-}) => {
-  const theme = useTheme();
-  const formatCount = (count) => {
-    if (count === undefined) return "";
-    return `${count}`;
-  };
-  const mainLabel = label;
-  const countLabel = count !== undefined ? formatCount(count) : "";
-  return (
-    <Chip
-      icon={
-        showCheckbox ? (
-          checked ? (
-            <CheckBoxOutlinedIcon
-              sx={{
-                fontSize: "1.1rem",
-                ml: 0.8,
-                color: checked
-                  ? variant === "secondary"
-                    ? "secondary.main"
-                    : "primary.main"
-                  : "text.secondary",
-              }}
-            />
-          ) : (
-            <CheckBoxOutlineBlankIcon
-              sx={{
-                fontSize: "1.1rem",
-                ml: 0.8,
-                color: "text.secondary",
-              }}
-            />
-          )
-        ) : null
-      }
-      label={
-        <span>
-          {mainLabel}
-          {countLabel && (
-            <>
-              <span
-                style={{
-                  display: "inline-block",
-                  width: "3px",
-                  height: "3px",
-                  borderRadius: "50%",
-                  backgroundColor: "currentColor",
-                  opacity: 0.2,
-                  margin: "0 4px",
-                  verticalAlign: "middle",
-                }}
-              />
-              <span style={{ opacity: 0.5 }}>{countLabel}</span>
-            </>
-          )}
-        </span>
-      }
-      onClick={onChange}
-      variant="outlined"
-      color={
-        checked
-          ? variant === "secondary"
-            ? "secondary"
-            : "primary"
-          : "default"
-      }
-      size="small"
-      data-checked={checked}
-      sx={{
-        height: "32px",
-        fontWeight: 600,
-        opacity: checked ? 1 : 0.8,
-        borderRadius: "5px",
-        borderWidth: "1px",
-        borderStyle: "solid",
-        cursor: "pointer",
-        pl: showCheckbox ? 0.5 : 0,
-        mr: 0.5,
-        mb: 0.5,
-        transition: "opacity 0.2s ease, border-color 0.2s ease",
-        "& .MuiChip-label": {
-          px: 0.75,
-          pl: showCheckbox ? 0.6 : 0.75,
-        },
-        "& .MuiChip-icon": {
-          mr: 0.5,
-          pl: 0.2,
-        },
-        "&:hover": {
-          opacity: 1,
-          backgroundColor: checked
-            ? alpha(
-                theme.palette[variant === "secondary" ? "secondary" : "primary"]
-                  .main,
-                theme.palette.mode === "light" ? 0.08 : 0.16
-              )
-            : "action.hover",
-          borderWidth: "1px",
-        },
-        backgroundColor: checked
-          ? alpha(
-              theme.palette[variant === "secondary" ? "secondary" : "primary"]
-                .main,
-              theme.palette.mode === "light" ? 0.08 : 0.16
-            )
-          : "background.paper",
-        borderColor: checked
-          ? variant === "secondary"
-            ? "secondary.main"
-            : "primary.main"
-          : "divider",
-        ...sx,
-      }}
-    />
-  );
-};
-export default FilterTag;

frontend/src/components/shared/InfoIconWithTooltip.js DELETED Viewed

@@ -1,87 +0,0 @@
-import React from "react";
-import { Box, Tooltip, Portal, Backdrop } from "@mui/material";
-import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined";
-const InfoIconWithTooltip = ({ tooltip, iconProps = {}, sx = {} }) => {
-  const [open, setOpen] = React.useState(false);
-  return (
-    <>
-      <Tooltip
-        title={tooltip}
-        arrow
-        placement="top"
-        open={open}
-        onOpen={() => setOpen(true)}
-        onClose={() => setOpen(false)}
-        componentsProps={{
-          tooltip: {
-            sx: {
-              bgcolor: "rgba(33, 33, 33, 0.95)",
-              padding: "12px 16px",
-              maxWidth: "none !important",
-              width: "auto",
-              minWidth: "200px",
-              fontSize: "0.875rem",
-              lineHeight: 1.5,
-              position: "relative",
-              zIndex: 1501,
-              "& .MuiTooltip-arrow": {
-                color: "rgba(33, 33, 33, 0.95)",
-              },
-            },
-          },
-          popper: {
-            sx: {
-              zIndex: 1501,
-              maxWidth: "min(600px, 90vw) !important",
-              '&[data-popper-placement*="bottom"] .MuiTooltip-tooltip': {
-                marginTop: "10px",
-              },
-              '&[data-popper-placement*="top"] .MuiTooltip-tooltip': {
-                marginBottom: "10px",
-              },
-            },
-          },
-        }}
-      >
-        <Box
-          component="span"
-          sx={{
-            opacity: 0.5,
-            display: "flex",
-            alignItems: "center",
-            cursor: "help",
-            "&:hover": { opacity: 0.8 },
-            position: "relative",
-            zIndex: open ? 1502 : "auto",
-            ...sx,
-          }}
-        >
-          <InfoOutlinedIcon
-            sx={{
-              fontSize: "1rem",
-              ...iconProps.sx,
-            }}
-            {...iconProps}
-          />
-        </Box>
-      </Tooltip>
-      {open && (
-        <Portal>
-          <Backdrop
-            open={true}
-            sx={{
-              zIndex: 1500,
-              backgroundColor: "rgba(0, 0, 0, 0.5)",
-              transition: "opacity 0.2s ease",
-              pointerEvents: "none",
-            }}
-          />
-        </Portal>
-      )}
-    </>
-  );
-};
-export default InfoIconWithTooltip;

frontend/src/components/shared/PageHeader.js DELETED Viewed

@@ -1,29 +0,0 @@
-import React from "react";
-import { Box, Typography } from "@mui/material";
-const PageHeader = ({ title, subtitle }) => {
-  return (
-    <Box
-      sx={{
-        display: "flex",
-        flexDirection: "column",
-        alignItems: "center",
-        textAlign: "center",
-        mb: 6,
-        mt: 6,
-        gap: 2,
-      }}
-    >
-      <Typography fontWeight="bold" variant="h3" component="h1">
-        {title}
-      </Typography>
-      {subtitle && (
-        <Typography variant="h6" color="text.secondary">
-          {subtitle}
-        </Typography>
-      )}
-    </Box>
-  );
-};
-export default PageHeader;

frontend/src/pages/BenchmarkDisplayPage.jsx CHANGED Viewed

@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
-import BenchmarkDisplay from "../components/BenchmarkDisplay";
 import API_CONFIG from "../config/api";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
@@ -138,7 +138,7 @@ function BenchmarkDisplayPage() {
             bgcolor: "background.paper",
           }}
         >
-          <BenchmarkDisplay
             onStartEvaluation={handleStartEvaluation}
             sessionId={sessionId}
             datasetUrl={datasetUrl}

 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
+import Display from "../components/Benchmark/Display";
 import API_CONFIG from "../config/api";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
             bgcolor: "background.paper",
           }}
         >
+          <Display
             onStartEvaluation={handleStartEvaluation}
             sessionId={sessionId}
             datasetUrl={datasetUrl}

frontend/src/pages/BenchmarkEvaluationPage.jsx CHANGED Viewed

@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
-import BenchmarkEvaluation from "../components/BenchmarkEvaluation";
 import API_CONFIG from "../config/api";
 function BenchmarkEvaluationPage() {
@@ -75,7 +75,7 @@ function BenchmarkEvaluationPage() {
           <CircularProgress size={60} />
         </Box>
       ) : (
-        <BenchmarkEvaluation
           sessionId={sessionId}
           isDefaultDocument={isDefault}
           onComplete={handleEvaluationComplete}

 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
+import Evaluation from "../components/Evaluation/Evaluation";
 import API_CONFIG from "../config/api";
 function BenchmarkEvaluationPage() {
           <CircularProgress size={60} />
         </Box>
       ) : (
+        <Evaluation
           sessionId={sessionId}
           isDefaultDocument={isDefault}
           onComplete={handleEvaluationComplete}

frontend/src/pages/BenchmarkGenerationPage.jsx CHANGED Viewed

@@ -2,7 +2,7 @@ import React, { useState, useEffect, useRef } from "react";
 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
-import BenchmarkGenerator from "../components/BenchmarkGenerator";
 function BenchmarkGenerationPage() {
   const navigate = useNavigate();
@@ -36,7 +36,7 @@ function BenchmarkGenerationPage() {
   return (
     <>
       <Intro />
-      <BenchmarkGenerator
         sessionId={sessionId}
         isDefaultDocument={isDefault}
         onComplete={handleGenerationComplete}

 import { Box, CircularProgress } from "@mui/material";
 import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
+import Generator from "../components/Benchmark/Generator";
 function BenchmarkGenerationPage() {
   const navigate = useNavigate();
   return (
     <>
       <Intro />
+      <Generator
         sessionId={sessionId}
         isDefaultDocument={isDefault}
         onComplete={handleGenerationComplete}

frontend/src/pages/EvaluationDisplayPage.jsx CHANGED Viewed

@@ -1,11 +1,12 @@
 import React, { useState, useEffect } from "react";
-import { Box, CircularProgress, Alert } from "@mui/material";
 import { useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
-import EvaluationDisplay from "../components/EvaluationDisplay";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
 import API_CONFIG from "../config/api";
 function EvaluationDisplayPage() {
   const [searchParams] = useSearchParams();
@@ -132,9 +133,7 @@ function EvaluationDisplayPage() {
           <CircularProgress size={60} />
         </Box>
       ) : error ? (
-        <Alert severity="error" sx={{ mt: 4, mb: 4 }}>
-          {error}
-        </Alert>
       ) : (
         <Box
           sx={{
@@ -144,10 +143,7 @@ function EvaluationDisplayPage() {
             bgcolor: "background.paper",
           }}
         >
-          <EvaluationDisplay
-            sessionId={sessionId}
-            results={evaluationResults}
-          />
         </Box>
       )}
     </>

 import React, { useState, useEffect } from "react";
+import { Box, CircularProgress } from "@mui/material";
 import { useSearchParams, Navigate } from "react-router-dom";
 import Intro from "../components/Intro";
+import Display from "../components/Evaluation/Display";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
 import API_CONFIG from "../config/api";
+import ErrorDisplay from "../components/common/ErrorDisplay";
 function EvaluationDisplayPage() {
   const [searchParams] = useSearchParams();
           <CircularProgress size={60} />
         </Box>
       ) : error ? (
+        <ErrorDisplay error={error} title="Error" />
       ) : (
         <Box
           sx={{
             bgcolor: "background.paper",
           }}
         >
+          <Display sessionId={sessionId} results={evaluationResults} />
         </Box>
       )}
     </>

frontend/src/pages/HomePage.jsx CHANGED Viewed

@@ -2,7 +2,7 @@ import React from "react";
 import { Box } from "@mui/material";
 import { useNavigate } from "react-router-dom";
 import Intro from "../components/Intro";
-import BenchmarkCreateForm from "../components/BenchmarkCreateForm";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
@@ -30,7 +30,7 @@ function HomePage() {
           bgcolor: "background.paper",
         }}
       >
-        <BenchmarkCreateForm onStartGeneration={handleStartGeneration} />
       </Box>
     </>
   );

 import { Box } from "@mui/material";
 import { useNavigate } from "react-router-dom";
 import Intro from "../components/Intro";
+import CreateForm from "../components/Benchmark/CreateForm";
 import { useThemeMode } from "../hooks/useThemeMode";
 import getTheme from "../config/theme";
           bgcolor: "background.paper",
         }}
       >
+        <CreateForm onStartGeneration={handleStartGeneration} />
       </Box>
     </>
   );