Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

tfrere commited on Apr 7

Commit

e64aebd

1 Parent(s): 048a732

update question download format

Browse files

Files changed (3) hide show

backend/routes/download.py +25 -28
backend/tasks/get_available_model_provider.py +8 -6
frontend/src/components/Benchmark/CreateForm.jsx +10 -8

backend/routes/download.py CHANGED Viewed

@@ -15,23 +15,23 @@ router = APIRouter(tags=["download"])
 @router.get("/download-dataset/{session_id}")
 async def download_dataset(session_id: str):
     """
-    Télécharge le dataset HuggingFace associé à une session et le renvoie au client
     Args:
-        session_id: Identifiant de la session
     Returns:
-        Fichier ZIP contenant le dataset
     """
     try:
-        # Créer un répertoire temporaire pour stocker les fichiers du dataset
         with tempfile.TemporaryDirectory() as temp_dir:
-            # Identifiant du repo HuggingFace
             repo_id = f"yourbench/yourbench_{session_id}"
             try:
-                # Télécharger le snapshot du dataset depuis HuggingFace
-                logging.info(f"Téléchargement du dataset {repo_id}")
                 snapshot_path = snapshot_download(
                     repo_id=repo_id,
                     repo_type="dataset",
@@ -39,22 +39,22 @@ async def download_dataset(session_id: str):
                     token=os.environ.get("HF_TOKEN")
                 )
-                logging.info(f"Dataset téléchargé dans {snapshot_path}")
-                # Créer un fichier ZIP en mémoire
                 zip_io = io.BytesIO()
                 with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
-                    # Parcourir tous les fichiers du dataset et les ajouter au ZIP
                     for root, _, files in os.walk(snapshot_path):
                         for file in files:
                             file_path = os.path.join(root, file)
                             arc_name = os.path.relpath(file_path, snapshot_path)
                             zip_file.write(file_path, arcname=arc_name)
-                # Remettre le curseur au début du stream
                 zip_io.seek(0)
-                # Renvoyer le ZIP au client
                 filename = f"yourbench_{session_id}_dataset.zip"
                 return StreamingResponse(
                     zip_io,
@@ -63,31 +63,31 @@ async def download_dataset(session_id: str):
                 )
             except Exception as e:
-                logging.error(f"Erreur lors du téléchargement du dataset: {str(e)}")
                 raise HTTPException(
                     status_code=500,
-                    detail=f"Erreur lors du téléchargement du dataset: {str(e)}"
                 )
     except Exception as e:
-        logging.error(f"Erreur générale: {str(e)}")
         raise HTTPException(
             status_code=500,
-            detail=f"Erreur lors du téléchargement: {str(e)}"
         )
 @router.get("/download-questions/{session_id}")
 async def download_questions(session_id: str):
     """
-    Télécharge les questions générées pour une session au format JSON
     Args:
-        session_id: Identifiant de la session
     Returns:
-        Fichier JSON contenant les questions générées
     """
     try:
-        # Identifiant du repo HuggingFace
         dataset_repo_id = f"yourbench/yourbench_{session_id}"
         # Initialize questions list
@@ -126,13 +126,10 @@ async def download_questions(session_id: str):
         # If we couldn't load any questions, the dataset might not exist
         if len(all_questions) == 0:
-            raise HTTPException(status_code=404, detail="Aucune question trouvée pour cette session")
-        # Convert questions to JSON
-        questions_json = json.dumps({
-            "session_id": session_id,
-            "questions": all_questions
-        }, ensure_ascii=False, indent=2)
         # Create a BytesIO object with the JSON data
         json_bytes = io.BytesIO(questions_json.encode('utf-8'))
@@ -150,8 +147,8 @@ async def download_questions(session_id: str):
         # Re-raise HTTP exceptions
         raise
     except Exception as e:
-        logging.error(f"Erreur lors de la récupération des questions: {str(e)}")
         raise HTTPException(
             status_code=500,
-            detail=f"Erreur lors du téléchargement des questions: {str(e)}"
         )

 @router.get("/download-dataset/{session_id}")
 async def download_dataset(session_id: str):
     """
+    Downloads the HuggingFace dataset associated with a session and returns it to the client
     Args:
+        session_id: Session identifier
     Returns:
+        ZIP file containing the dataset
     """
     try:
+        # Create a temporary directory to store the dataset files
         with tempfile.TemporaryDirectory() as temp_dir:
+            # HuggingFace repo identifier
             repo_id = f"yourbench/yourbench_{session_id}"
             try:
+                # Download the dataset snapshot from HuggingFace
+                logging.info(f"Downloading dataset {repo_id}")
                 snapshot_path = snapshot_download(
                     repo_id=repo_id,
                     repo_type="dataset",
                     token=os.environ.get("HF_TOKEN")
                 )
+                logging.info(f"Dataset downloaded to {snapshot_path}")
+                # Create a ZIP file in memory
                 zip_io = io.BytesIO()
                 with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                    # Loop through all files in the dataset and add them to the ZIP
                     for root, _, files in os.walk(snapshot_path):
                         for file in files:
                             file_path = os.path.join(root, file)
                             arc_name = os.path.relpath(file_path, snapshot_path)
                             zip_file.write(file_path, arcname=arc_name)
+                # Reset the cursor to the beginning of the stream
                 zip_io.seek(0)
+                # Return the ZIP to the client
                 filename = f"yourbench_{session_id}_dataset.zip"
                 return StreamingResponse(
                     zip_io,
                 )
             except Exception as e:
+                logging.error(f"Error while downloading the dataset: {str(e)}")
                 raise HTTPException(
                     status_code=500,
+                    detail=f"Error while downloading the dataset: {str(e)}"
                 )
     except Exception as e:
+        logging.error(f"General error: {str(e)}")
         raise HTTPException(
             status_code=500,
+            detail=f"Error during download: {str(e)}"
         )
 @router.get("/download-questions/{session_id}")
 async def download_questions(session_id: str):
     """
+    Downloads the questions generated for a session in JSON format
     Args:
+        session_id: Session identifier
     Returns:
+        JSON file containing only the list of generated questions
     """
     try:
+        # HuggingFace repo identifier
         dataset_repo_id = f"yourbench/yourbench_{session_id}"
         # Initialize questions list
         # If we couldn't load any questions, the dataset might not exist
         if len(all_questions) == 0:
+            raise HTTPException(status_code=404, detail="No questions found for this session")
+        # Convert only the list of questions to JSON (without session_id and without wrapping object)
+        questions_json = json.dumps(all_questions, ensure_ascii=False, indent=2)
         # Create a BytesIO object with the JSON data
         json_bytes = io.BytesIO(questions_json.encode('utf-8'))
         # Re-raise HTTP exceptions
         raise
     except Exception as e:
+        logging.error(f"Error retrieving questions: {str(e)}")
         raise HTTPException(
             status_code=500,
+            detail=f"Error downloading questions: {str(e)}"
         )

backend/tasks/get_available_model_provider.py CHANGED Viewed

@@ -102,7 +102,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
                 if verbose:
                     logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
                 return False
     except Exception as e:
         if verbose:
             logger.warning(f"Error in test_provider: {str(e)}")
@@ -172,6 +172,7 @@ def get_available_model_provider(model_name, verbose=False):
             raise ValueError("HF_TOKEN not defined in environment")
         # Get providers for the model and prioritize them
         try:
             # Essayer avec le token
             try:
@@ -198,18 +199,19 @@ def get_available_model_provider(model_name, verbose=False):
                     # Autre erreur, la relancer
                     raise auth_error
-            if not hasattr(info, "inference_provider_mapping"):
                 if verbose:
                     logger.info(f"No inference providers found for {model_name}")
                 # Essayer avec la liste de providers par défaut
                 return _test_fallback_providers(model_name, verbose)
             providers = list(info.inference_provider_mapping.keys())
             if not providers:
                 if verbose:
                     logger.info(f"Empty list of providers for {model_name}")
                 # Essayer avec la liste de providers par défaut
                 return _test_fallback_providers(model_name, verbose)
         except Exception as e:
             if verbose:
                 logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
@@ -271,7 +273,7 @@ def get_available_model_provider(model_name, verbose=False):
         if verbose:
             logger.error(f"Error in get_available_model_provider: {str(e)}")
         return None
 def _test_fallback_providers(model_name, verbose=False):
     """
     Fonction de secours qui teste une liste de providers communs sans passer par l'API
@@ -459,10 +461,10 @@ def test_models(verbose=True):
         "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
         "mistralai/Mistral-Small-24B-Instruct-2501",
     ]
     if verbose:
         print("\n===== Testing all available models =====")
     for model in models:
         provider = get_available_model_provider(model, verbose)
         results["all_models"][model] = provider

                 if verbose:
                     logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
                 return False
     except Exception as e:
         if verbose:
             logger.warning(f"Error in test_provider: {str(e)}")
             raise ValueError("HF_TOKEN not defined in environment")
         # Get providers for the model and prioritize them
+        info = None
         try:
             # Essayer avec le token
             try:
                     # Autre erreur, la relancer
                     raise auth_error
+            if not info or not hasattr(info, "inference_provider_mapping"):
                 if verbose:
                     logger.info(f"No inference providers found for {model_name}")
                 # Essayer avec la liste de providers par défaut
                 return _test_fallback_providers(model_name, verbose)
             providers = list(info.inference_provider_mapping.keys())
             if not providers:
                 if verbose:
                     logger.info(f"Empty list of providers for {model_name}")
                 # Essayer avec la liste de providers par défaut
                 return _test_fallback_providers(model_name, verbose)
         except Exception as e:
             if verbose:
                 logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
         if verbose:
             logger.error(f"Error in get_available_model_provider: {str(e)}")
         return None
 def _test_fallback_providers(model_name, verbose=False):
     """
     Fonction de secours qui teste une liste de providers communs sans passer par l'API
         "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
         "mistralai/Mistral-Small-24B-Instruct-2501",
     ]
     if verbose:
         print("\n===== Testing all available models =====")
     for model in models:
         provider = get_available_model_provider(model, verbose)
         results["all_models"][model] = provider

frontend/src/components/Benchmark/CreateForm.jsx CHANGED Viewed

@@ -83,6 +83,12 @@ function CreateForm({ onStartGeneration }) {
   // Liste des documents par défaut
   const defaultDocuments = [
     {
       id: "the-bitter-lesson",
       name: "The Bitter Lesson",
@@ -95,12 +101,6 @@ function CreateForm({ onStartGeneration }) {
       icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
       description: "Frequently asked questions about hurricanes",
     },
-    {
-      id: "pokemon-guide",
-      name: "Pokemon Guide",
-      icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
-      description: "A comprehensive guide for Pokemon enthusiasts",
-    },
   ];
   const handleCloseSnackbar = () => {
@@ -181,8 +181,10 @@ function CreateForm({ onStartGeneration }) {
         align="center"
         sx={{ mb: 2, color: "text.secondary" }}
       >
-        To create a benchmark, choose a sample document or upload your own
-        file/URL
       </Typography>
       <Grid container spacing={2} sx={{ mb: 0 }}>

   // Liste des documents par défaut
   const defaultDocuments = [
+    {
+      id: "pokemon-guide",
+      name: "Pokemon Guide",
+      icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
+      description: "A comprehensive guide for Pokemon enthusiasts",
+    },
     {
       id: "the-bitter-lesson",
       name: "The Bitter Lesson",
       icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
       description: "Frequently asked questions about hurricanes",
     },
   ];
   const handleCloseSnackbar = () => {
         align="center"
         sx={{ mb: 2, color: "text.secondary" }}
       >
+        To create a benchmark, <b>choose</b> a <b>sample document</b> or{" "}
+        <b>upload</b> your <b>own file/URL</b>.
+        <br />
+        (ideally a knowledge base, a FAQ, a news article, etc.)
       </Typography>
       <Grid container spacing={2} sx={{ mb: 0 }}>