Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update question download format
Browse files
backend/routes/download.py
CHANGED
@@ -15,23 +15,23 @@ router = APIRouter(tags=["download"])
|
|
15 |
@router.get("/download-dataset/{session_id}")
|
16 |
async def download_dataset(session_id: str):
|
17 |
"""
|
18 |
-
|
19 |
|
20 |
Args:
|
21 |
-
session_id:
|
22 |
|
23 |
Returns:
|
24 |
-
|
25 |
"""
|
26 |
try:
|
27 |
-
#
|
28 |
with tempfile.TemporaryDirectory() as temp_dir:
|
29 |
-
#
|
30 |
repo_id = f"yourbench/yourbench_{session_id}"
|
31 |
|
32 |
try:
|
33 |
-
#
|
34 |
-
logging.info(f"
|
35 |
snapshot_path = snapshot_download(
|
36 |
repo_id=repo_id,
|
37 |
repo_type="dataset",
|
@@ -39,22 +39,22 @@ async def download_dataset(session_id: str):
|
|
39 |
token=os.environ.get("HF_TOKEN")
|
40 |
)
|
41 |
|
42 |
-
logging.info(f"Dataset
|
43 |
|
44 |
-
#
|
45 |
zip_io = io.BytesIO()
|
46 |
with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
47 |
-
#
|
48 |
for root, _, files in os.walk(snapshot_path):
|
49 |
for file in files:
|
50 |
file_path = os.path.join(root, file)
|
51 |
arc_name = os.path.relpath(file_path, snapshot_path)
|
52 |
zip_file.write(file_path, arcname=arc_name)
|
53 |
|
54 |
-
#
|
55 |
zip_io.seek(0)
|
56 |
|
57 |
-
#
|
58 |
filename = f"yourbench_{session_id}_dataset.zip"
|
59 |
return StreamingResponse(
|
60 |
zip_io,
|
@@ -63,31 +63,31 @@ async def download_dataset(session_id: str):
|
|
63 |
)
|
64 |
|
65 |
except Exception as e:
|
66 |
-
logging.error(f"
|
67 |
raise HTTPException(
|
68 |
status_code=500,
|
69 |
-
detail=f"
|
70 |
)
|
71 |
except Exception as e:
|
72 |
-
logging.error(f"
|
73 |
raise HTTPException(
|
74 |
status_code=500,
|
75 |
-
detail=f"
|
76 |
)
|
77 |
|
78 |
@router.get("/download-questions/{session_id}")
|
79 |
async def download_questions(session_id: str):
|
80 |
"""
|
81 |
-
|
82 |
|
83 |
Args:
|
84 |
-
session_id:
|
85 |
|
86 |
Returns:
|
87 |
-
|
88 |
"""
|
89 |
try:
|
90 |
-
#
|
91 |
dataset_repo_id = f"yourbench/yourbench_{session_id}"
|
92 |
|
93 |
# Initialize questions list
|
@@ -126,13 +126,10 @@ async def download_questions(session_id: str):
|
|
126 |
|
127 |
# If we couldn't load any questions, the dataset might not exist
|
128 |
if len(all_questions) == 0:
|
129 |
-
raise HTTPException(status_code=404, detail="
|
130 |
|
131 |
-
# Convert questions to JSON
|
132 |
-
questions_json = json.dumps(
|
133 |
-
"session_id": session_id,
|
134 |
-
"questions": all_questions
|
135 |
-
}, ensure_ascii=False, indent=2)
|
136 |
|
137 |
# Create a BytesIO object with the JSON data
|
138 |
json_bytes = io.BytesIO(questions_json.encode('utf-8'))
|
@@ -150,8 +147,8 @@ async def download_questions(session_id: str):
|
|
150 |
# Re-raise HTTP exceptions
|
151 |
raise
|
152 |
except Exception as e:
|
153 |
-
logging.error(f"
|
154 |
raise HTTPException(
|
155 |
status_code=500,
|
156 |
-
detail=f"
|
157 |
)
|
|
|
15 |
@router.get("/download-dataset/{session_id}")
|
16 |
async def download_dataset(session_id: str):
|
17 |
"""
|
18 |
+
Downloads the HuggingFace dataset associated with a session and returns it to the client
|
19 |
|
20 |
Args:
|
21 |
+
session_id: Session identifier
|
22 |
|
23 |
Returns:
|
24 |
+
ZIP file containing the dataset
|
25 |
"""
|
26 |
try:
|
27 |
+
# Create a temporary directory to store the dataset files
|
28 |
with tempfile.TemporaryDirectory() as temp_dir:
|
29 |
+
# HuggingFace repo identifier
|
30 |
repo_id = f"yourbench/yourbench_{session_id}"
|
31 |
|
32 |
try:
|
33 |
+
# Download the dataset snapshot from HuggingFace
|
34 |
+
logging.info(f"Downloading dataset {repo_id}")
|
35 |
snapshot_path = snapshot_download(
|
36 |
repo_id=repo_id,
|
37 |
repo_type="dataset",
|
|
|
39 |
token=os.environ.get("HF_TOKEN")
|
40 |
)
|
41 |
|
42 |
+
logging.info(f"Dataset downloaded to {snapshot_path}")
|
43 |
|
44 |
+
# Create a ZIP file in memory
|
45 |
zip_io = io.BytesIO()
|
46 |
with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
47 |
+
# Loop through all files in the dataset and add them to the ZIP
|
48 |
for root, _, files in os.walk(snapshot_path):
|
49 |
for file in files:
|
50 |
file_path = os.path.join(root, file)
|
51 |
arc_name = os.path.relpath(file_path, snapshot_path)
|
52 |
zip_file.write(file_path, arcname=arc_name)
|
53 |
|
54 |
+
# Reset the cursor to the beginning of the stream
|
55 |
zip_io.seek(0)
|
56 |
|
57 |
+
# Return the ZIP to the client
|
58 |
filename = f"yourbench_{session_id}_dataset.zip"
|
59 |
return StreamingResponse(
|
60 |
zip_io,
|
|
|
63 |
)
|
64 |
|
65 |
except Exception as e:
|
66 |
+
logging.error(f"Error while downloading the dataset: {str(e)}")
|
67 |
raise HTTPException(
|
68 |
status_code=500,
|
69 |
+
detail=f"Error while downloading the dataset: {str(e)}"
|
70 |
)
|
71 |
except Exception as e:
|
72 |
+
logging.error(f"General error: {str(e)}")
|
73 |
raise HTTPException(
|
74 |
status_code=500,
|
75 |
+
detail=f"Error during download: {str(e)}"
|
76 |
)
|
77 |
|
78 |
@router.get("/download-questions/{session_id}")
|
79 |
async def download_questions(session_id: str):
|
80 |
"""
|
81 |
+
Downloads the questions generated for a session in JSON format
|
82 |
|
83 |
Args:
|
84 |
+
session_id: Session identifier
|
85 |
|
86 |
Returns:
|
87 |
+
JSON file containing only the list of generated questions
|
88 |
"""
|
89 |
try:
|
90 |
+
# HuggingFace repo identifier
|
91 |
dataset_repo_id = f"yourbench/yourbench_{session_id}"
|
92 |
|
93 |
# Initialize questions list
|
|
|
126 |
|
127 |
# If we couldn't load any questions, the dataset might not exist
|
128 |
if len(all_questions) == 0:
|
129 |
+
raise HTTPException(status_code=404, detail="No questions found for this session")
|
130 |
|
131 |
+
# Convert only the list of questions to JSON (without session_id and without wrapping object)
|
132 |
+
questions_json = json.dumps(all_questions, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
133 |
|
134 |
# Create a BytesIO object with the JSON data
|
135 |
json_bytes = io.BytesIO(questions_json.encode('utf-8'))
|
|
|
147 |
# Re-raise HTTP exceptions
|
148 |
raise
|
149 |
except Exception as e:
|
150 |
+
logging.error(f"Error retrieving questions: {str(e)}")
|
151 |
raise HTTPException(
|
152 |
status_code=500,
|
153 |
+
detail=f"Error downloading questions: {str(e)}"
|
154 |
)
|
backend/tasks/get_available_model_provider.py
CHANGED
@@ -102,7 +102,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
102 |
if verbose:
|
103 |
logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
|
104 |
return False
|
105 |
-
|
106 |
except Exception as e:
|
107 |
if verbose:
|
108 |
logger.warning(f"Error in test_provider: {str(e)}")
|
@@ -172,6 +172,7 @@ def get_available_model_provider(model_name, verbose=False):
|
|
172 |
raise ValueError("HF_TOKEN not defined in environment")
|
173 |
|
174 |
# Get providers for the model and prioritize them
|
|
|
175 |
try:
|
176 |
# Essayer avec le token
|
177 |
try:
|
@@ -198,18 +199,19 @@ def get_available_model_provider(model_name, verbose=False):
|
|
198 |
# Autre erreur, la relancer
|
199 |
raise auth_error
|
200 |
|
201 |
-
if not hasattr(info, "inference_provider_mapping"):
|
202 |
if verbose:
|
203 |
logger.info(f"No inference providers found for {model_name}")
|
204 |
# Essayer avec la liste de providers par défaut
|
205 |
return _test_fallback_providers(model_name, verbose)
|
206 |
-
|
207 |
providers = list(info.inference_provider_mapping.keys())
|
208 |
if not providers:
|
209 |
if verbose:
|
210 |
logger.info(f"Empty list of providers for {model_name}")
|
211 |
# Essayer avec la liste de providers par défaut
|
212 |
return _test_fallback_providers(model_name, verbose)
|
|
|
213 |
except Exception as e:
|
214 |
if verbose:
|
215 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
@@ -271,7 +273,7 @@ def get_available_model_provider(model_name, verbose=False):
|
|
271 |
if verbose:
|
272 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
273 |
return None
|
274 |
-
|
275 |
def _test_fallback_providers(model_name, verbose=False):
|
276 |
"""
|
277 |
Fonction de secours qui teste une liste de providers communs sans passer par l'API
|
@@ -459,10 +461,10 @@ def test_models(verbose=True):
|
|
459 |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
460 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
461 |
]
|
462 |
-
|
463 |
if verbose:
|
464 |
print("\n===== Testing all available models =====")
|
465 |
-
|
466 |
for model in models:
|
467 |
provider = get_available_model_provider(model, verbose)
|
468 |
results["all_models"][model] = provider
|
|
|
102 |
if verbose:
|
103 |
logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
|
104 |
return False
|
105 |
+
|
106 |
except Exception as e:
|
107 |
if verbose:
|
108 |
logger.warning(f"Error in test_provider: {str(e)}")
|
|
|
172 |
raise ValueError("HF_TOKEN not defined in environment")
|
173 |
|
174 |
# Get providers for the model and prioritize them
|
175 |
+
info = None
|
176 |
try:
|
177 |
# Essayer avec le token
|
178 |
try:
|
|
|
199 |
# Autre erreur, la relancer
|
200 |
raise auth_error
|
201 |
|
202 |
+
if not info or not hasattr(info, "inference_provider_mapping"):
|
203 |
if verbose:
|
204 |
logger.info(f"No inference providers found for {model_name}")
|
205 |
# Essayer avec la liste de providers par défaut
|
206 |
return _test_fallback_providers(model_name, verbose)
|
207 |
+
|
208 |
providers = list(info.inference_provider_mapping.keys())
|
209 |
if not providers:
|
210 |
if verbose:
|
211 |
logger.info(f"Empty list of providers for {model_name}")
|
212 |
# Essayer avec la liste de providers par défaut
|
213 |
return _test_fallback_providers(model_name, verbose)
|
214 |
+
|
215 |
except Exception as e:
|
216 |
if verbose:
|
217 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
|
|
273 |
if verbose:
|
274 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
275 |
return None
|
276 |
+
|
277 |
def _test_fallback_providers(model_name, verbose=False):
|
278 |
"""
|
279 |
Fonction de secours qui teste une liste de providers communs sans passer par l'API
|
|
|
461 |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
462 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
463 |
]
|
464 |
+
|
465 |
if verbose:
|
466 |
print("\n===== Testing all available models =====")
|
467 |
+
|
468 |
for model in models:
|
469 |
provider = get_available_model_provider(model, verbose)
|
470 |
results["all_models"][model] = provider
|
frontend/src/components/Benchmark/CreateForm.jsx
CHANGED
@@ -83,6 +83,12 @@ function CreateForm({ onStartGeneration }) {
|
|
83 |
|
84 |
// Liste des documents par défaut
|
85 |
const defaultDocuments = [
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
{
|
87 |
id: "the-bitter-lesson",
|
88 |
name: "The Bitter Lesson",
|
@@ -95,12 +101,6 @@ function CreateForm({ onStartGeneration }) {
|
|
95 |
icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
|
96 |
description: "Frequently asked questions about hurricanes",
|
97 |
},
|
98 |
-
{
|
99 |
-
id: "pokemon-guide",
|
100 |
-
name: "Pokemon Guide",
|
101 |
-
icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
|
102 |
-
description: "A comprehensive guide for Pokemon enthusiasts",
|
103 |
-
},
|
104 |
];
|
105 |
|
106 |
const handleCloseSnackbar = () => {
|
@@ -181,8 +181,10 @@ function CreateForm({ onStartGeneration }) {
|
|
181 |
align="center"
|
182 |
sx={{ mb: 2, color: "text.secondary" }}
|
183 |
>
|
184 |
-
To create a benchmark, choose a sample document or
|
185 |
-
file/URL
|
|
|
|
|
186 |
</Typography>
|
187 |
|
188 |
<Grid container spacing={2} sx={{ mb: 0 }}>
|
|
|
83 |
|
84 |
// Liste des documents par défaut
|
85 |
const defaultDocuments = [
|
86 |
+
{
|
87 |
+
id: "pokemon-guide",
|
88 |
+
name: "Pokemon Guide",
|
89 |
+
icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
|
90 |
+
description: "A comprehensive guide for Pokemon enthusiasts",
|
91 |
+
},
|
92 |
{
|
93 |
id: "the-bitter-lesson",
|
94 |
name: "The Bitter Lesson",
|
|
|
101 |
icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
|
102 |
description: "Frequently asked questions about hurricanes",
|
103 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
];
|
105 |
|
106 |
const handleCloseSnackbar = () => {
|
|
|
181 |
align="center"
|
182 |
sx={{ mb: 2, color: "text.secondary" }}
|
183 |
>
|
184 |
+
To create a benchmark, <b>choose</b> a <b>sample document</b> or{" "}
|
185 |
+
<b>upload</b> your <b>own file/URL</b>.
|
186 |
+
<br />
|
187 |
+
(ideally a knowledge base, a FAQ, a news article, etc.)
|
188 |
</Typography>
|
189 |
|
190 |
<Grid container spacing={2} sx={{ mb: 0 }}>
|