tfrere commited on
Commit
e64aebd
·
1 Parent(s): 048a732

update question download format

Browse files
backend/routes/download.py CHANGED
@@ -15,23 +15,23 @@ router = APIRouter(tags=["download"])
15
  @router.get("/download-dataset/{session_id}")
16
  async def download_dataset(session_id: str):
17
  """
18
- Télécharge le dataset HuggingFace associé à une session et le renvoie au client
19
 
20
  Args:
21
- session_id: Identifiant de la session
22
 
23
  Returns:
24
- Fichier ZIP contenant le dataset
25
  """
26
  try:
27
- # Créer un répertoire temporaire pour stocker les fichiers du dataset
28
  with tempfile.TemporaryDirectory() as temp_dir:
29
- # Identifiant du repo HuggingFace
30
  repo_id = f"yourbench/yourbench_{session_id}"
31
 
32
  try:
33
- # Télécharger le snapshot du dataset depuis HuggingFace
34
- logging.info(f"Téléchargement du dataset {repo_id}")
35
  snapshot_path = snapshot_download(
36
  repo_id=repo_id,
37
  repo_type="dataset",
@@ -39,22 +39,22 @@ async def download_dataset(session_id: str):
39
  token=os.environ.get("HF_TOKEN")
40
  )
41
 
42
- logging.info(f"Dataset téléchargé dans {snapshot_path}")
43
 
44
- # Créer un fichier ZIP en mémoire
45
  zip_io = io.BytesIO()
46
  with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
47
- # Parcourir tous les fichiers du dataset et les ajouter au ZIP
48
  for root, _, files in os.walk(snapshot_path):
49
  for file in files:
50
  file_path = os.path.join(root, file)
51
  arc_name = os.path.relpath(file_path, snapshot_path)
52
  zip_file.write(file_path, arcname=arc_name)
53
 
54
- # Remettre le curseur au début du stream
55
  zip_io.seek(0)
56
 
57
- # Renvoyer le ZIP au client
58
  filename = f"yourbench_{session_id}_dataset.zip"
59
  return StreamingResponse(
60
  zip_io,
@@ -63,31 +63,31 @@ async def download_dataset(session_id: str):
63
  )
64
 
65
  except Exception as e:
66
- logging.error(f"Erreur lors du téléchargement du dataset: {str(e)}")
67
  raise HTTPException(
68
  status_code=500,
69
- detail=f"Erreur lors du téléchargement du dataset: {str(e)}"
70
  )
71
  except Exception as e:
72
- logging.error(f"Erreur générale: {str(e)}")
73
  raise HTTPException(
74
  status_code=500,
75
- detail=f"Erreur lors du téléchargement: {str(e)}"
76
  )
77
 
78
  @router.get("/download-questions/{session_id}")
79
  async def download_questions(session_id: str):
80
  """
81
- Télécharge les questions générées pour une session au format JSON
82
 
83
  Args:
84
- session_id: Identifiant de la session
85
 
86
  Returns:
87
- Fichier JSON contenant les questions générées
88
  """
89
  try:
90
- # Identifiant du repo HuggingFace
91
  dataset_repo_id = f"yourbench/yourbench_{session_id}"
92
 
93
  # Initialize questions list
@@ -126,13 +126,10 @@ async def download_questions(session_id: str):
126
 
127
  # If we couldn't load any questions, the dataset might not exist
128
  if len(all_questions) == 0:
129
- raise HTTPException(status_code=404, detail="Aucune question trouvée pour cette session")
130
 
131
- # Convert questions to JSON
132
- questions_json = json.dumps({
133
- "session_id": session_id,
134
- "questions": all_questions
135
- }, ensure_ascii=False, indent=2)
136
 
137
  # Create a BytesIO object with the JSON data
138
  json_bytes = io.BytesIO(questions_json.encode('utf-8'))
@@ -150,8 +147,8 @@ async def download_questions(session_id: str):
150
  # Re-raise HTTP exceptions
151
  raise
152
  except Exception as e:
153
- logging.error(f"Erreur lors de la récupération des questions: {str(e)}")
154
  raise HTTPException(
155
  status_code=500,
156
- detail=f"Erreur lors du téléchargement des questions: {str(e)}"
157
  )
 
15
  @router.get("/download-dataset/{session_id}")
16
  async def download_dataset(session_id: str):
17
  """
18
+ Downloads the HuggingFace dataset associated with a session and returns it to the client
19
 
20
  Args:
21
+ session_id: Session identifier
22
 
23
  Returns:
24
+ ZIP file containing the dataset
25
  """
26
  try:
27
+ # Create a temporary directory to store the dataset files
28
  with tempfile.TemporaryDirectory() as temp_dir:
29
+ # HuggingFace repo identifier
30
  repo_id = f"yourbench/yourbench_{session_id}"
31
 
32
  try:
33
+ # Download the dataset snapshot from HuggingFace
34
+ logging.info(f"Downloading dataset {repo_id}")
35
  snapshot_path = snapshot_download(
36
  repo_id=repo_id,
37
  repo_type="dataset",
 
39
  token=os.environ.get("HF_TOKEN")
40
  )
41
 
42
+ logging.info(f"Dataset downloaded to {snapshot_path}")
43
 
44
+ # Create a ZIP file in memory
45
  zip_io = io.BytesIO()
46
  with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
47
+ # Loop through all files in the dataset and add them to the ZIP
48
  for root, _, files in os.walk(snapshot_path):
49
  for file in files:
50
  file_path = os.path.join(root, file)
51
  arc_name = os.path.relpath(file_path, snapshot_path)
52
  zip_file.write(file_path, arcname=arc_name)
53
 
54
+ # Reset the cursor to the beginning of the stream
55
  zip_io.seek(0)
56
 
57
+ # Return the ZIP to the client
58
  filename = f"yourbench_{session_id}_dataset.zip"
59
  return StreamingResponse(
60
  zip_io,
 
63
  )
64
 
65
  except Exception as e:
66
+ logging.error(f"Error while downloading the dataset: {str(e)}")
67
  raise HTTPException(
68
  status_code=500,
69
+ detail=f"Error while downloading the dataset: {str(e)}"
70
  )
71
  except Exception as e:
72
+ logging.error(f"General error: {str(e)}")
73
  raise HTTPException(
74
  status_code=500,
75
+ detail=f"Error during download: {str(e)}"
76
  )
77
 
78
  @router.get("/download-questions/{session_id}")
79
  async def download_questions(session_id: str):
80
  """
81
+ Downloads the questions generated for a session in JSON format
82
 
83
  Args:
84
+ session_id: Session identifier
85
 
86
  Returns:
87
+ JSON file containing only the list of generated questions
88
  """
89
  try:
90
+ # HuggingFace repo identifier
91
  dataset_repo_id = f"yourbench/yourbench_{session_id}"
92
 
93
  # Initialize questions list
 
126
 
127
  # If we couldn't load any questions, the dataset might not exist
128
  if len(all_questions) == 0:
129
+ raise HTTPException(status_code=404, detail="No questions found for this session")
130
 
131
+ # Convert only the list of questions to JSON (without session_id and without wrapping object)
132
+ questions_json = json.dumps(all_questions, ensure_ascii=False, indent=2)
 
 
 
133
 
134
  # Create a BytesIO object with the JSON data
135
  json_bytes = io.BytesIO(questions_json.encode('utf-8'))
 
147
  # Re-raise HTTP exceptions
148
  raise
149
  except Exception as e:
150
+ logging.error(f"Error retrieving questions: {str(e)}")
151
  raise HTTPException(
152
  status_code=500,
153
+ detail=f"Error downloading questions: {str(e)}"
154
  )
backend/tasks/get_available_model_provider.py CHANGED
@@ -102,7 +102,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
102
  if verbose:
103
  logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
104
  return False
105
-
106
  except Exception as e:
107
  if verbose:
108
  logger.warning(f"Error in test_provider: {str(e)}")
@@ -172,6 +172,7 @@ def get_available_model_provider(model_name, verbose=False):
172
  raise ValueError("HF_TOKEN not defined in environment")
173
 
174
  # Get providers for the model and prioritize them
 
175
  try:
176
  # Essayer avec le token
177
  try:
@@ -198,18 +199,19 @@ def get_available_model_provider(model_name, verbose=False):
198
  # Autre erreur, la relancer
199
  raise auth_error
200
 
201
- if not hasattr(info, "inference_provider_mapping"):
202
  if verbose:
203
  logger.info(f"No inference providers found for {model_name}")
204
  # Essayer avec la liste de providers par défaut
205
  return _test_fallback_providers(model_name, verbose)
206
-
207
  providers = list(info.inference_provider_mapping.keys())
208
  if not providers:
209
  if verbose:
210
  logger.info(f"Empty list of providers for {model_name}")
211
  # Essayer avec la liste de providers par défaut
212
  return _test_fallback_providers(model_name, verbose)
 
213
  except Exception as e:
214
  if verbose:
215
  logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
@@ -271,7 +273,7 @@ def get_available_model_provider(model_name, verbose=False):
271
  if verbose:
272
  logger.error(f"Error in get_available_model_provider: {str(e)}")
273
  return None
274
-
275
  def _test_fallback_providers(model_name, verbose=False):
276
  """
277
  Fonction de secours qui teste une liste de providers communs sans passer par l'API
@@ -459,10 +461,10 @@ def test_models(verbose=True):
459
  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
460
  "mistralai/Mistral-Small-24B-Instruct-2501",
461
  ]
462
-
463
  if verbose:
464
  print("\n===== Testing all available models =====")
465
-
466
  for model in models:
467
  provider = get_available_model_provider(model, verbose)
468
  results["all_models"][model] = provider
 
102
  if verbose:
103
  logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
104
  return False
105
+
106
  except Exception as e:
107
  if verbose:
108
  logger.warning(f"Error in test_provider: {str(e)}")
 
172
  raise ValueError("HF_TOKEN not defined in environment")
173
 
174
  # Get providers for the model and prioritize them
175
+ info = None
176
  try:
177
  # Essayer avec le token
178
  try:
 
199
  # Autre erreur, la relancer
200
  raise auth_error
201
 
202
+ if not info or not hasattr(info, "inference_provider_mapping"):
203
  if verbose:
204
  logger.info(f"No inference providers found for {model_name}")
205
  # Essayer avec la liste de providers par défaut
206
  return _test_fallback_providers(model_name, verbose)
207
+
208
  providers = list(info.inference_provider_mapping.keys())
209
  if not providers:
210
  if verbose:
211
  logger.info(f"Empty list of providers for {model_name}")
212
  # Essayer avec la liste de providers par défaut
213
  return _test_fallback_providers(model_name, verbose)
214
+
215
  except Exception as e:
216
  if verbose:
217
  logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
 
273
  if verbose:
274
  logger.error(f"Error in get_available_model_provider: {str(e)}")
275
  return None
276
+
277
  def _test_fallback_providers(model_name, verbose=False):
278
  """
279
  Fonction de secours qui teste une liste de providers communs sans passer par l'API
 
461
  "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
462
  "mistralai/Mistral-Small-24B-Instruct-2501",
463
  ]
464
+
465
  if verbose:
466
  print("\n===== Testing all available models =====")
467
+
468
  for model in models:
469
  provider = get_available_model_provider(model, verbose)
470
  results["all_models"][model] = provider
frontend/src/components/Benchmark/CreateForm.jsx CHANGED
@@ -83,6 +83,12 @@ function CreateForm({ onStartGeneration }) {
83
 
84
  // Liste des documents par défaut
85
  const defaultDocuments = [
 
 
 
 
 
 
86
  {
87
  id: "the-bitter-lesson",
88
  name: "The Bitter Lesson",
@@ -95,12 +101,6 @@ function CreateForm({ onStartGeneration }) {
95
  icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
96
  description: "Frequently asked questions about hurricanes",
97
  },
98
- {
99
- id: "pokemon-guide",
100
- name: "Pokemon Guide",
101
- icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
102
- description: "A comprehensive guide for Pokemon enthusiasts",
103
- },
104
  ];
105
 
106
  const handleCloseSnackbar = () => {
@@ -181,8 +181,10 @@ function CreateForm({ onStartGeneration }) {
181
  align="center"
182
  sx={{ mb: 2, color: "text.secondary" }}
183
  >
184
- To create a benchmark, choose a sample document or upload your own
185
- file/URL
 
 
186
  </Typography>
187
 
188
  <Grid container spacing={2} sx={{ mb: 0 }}>
 
83
 
84
  // Liste des documents par défaut
85
  const defaultDocuments = [
86
+ {
87
+ id: "pokemon-guide",
88
+ name: "Pokemon Guide",
89
+ icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
90
+ description: "A comprehensive guide for Pokemon enthusiasts",
91
+ },
92
  {
93
  id: "the-bitter-lesson",
94
  name: "The Bitter Lesson",
 
101
  icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
102
  description: "Frequently asked questions about hurricanes",
103
  },
 
 
 
 
 
 
104
  ];
105
 
106
  const handleCloseSnackbar = () => {
 
181
  align="center"
182
  sx={{ mb: 2, color: "text.secondary" }}
183
  >
184
+ To create a benchmark, <b>choose</b> a <b>sample document</b> or{" "}
185
+ <b>upload</b> your <b>own file/URL</b>.
186
+ <br />
187
+ (ideally a knowledge base, a FAQ, a news article, etc.)
188
  </Typography>
189
 
190
  <Grid container spacing={2} sx={{ mb: 0 }}>