Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update error handling, improve upload security checks
Browse files- backend/lighteval_task/lighteval_task.py +2 -3
- backend/pyproject.toml +1 -0
- backend/routes/cleanup.py +1 -1
- backend/routes/upload.py +82 -10
- backend/tasks/create_bench_config_file.py +7 -6
- backend/tasks/evaluation_task.py +35 -36
- backend/tasks/get_available_model_provider.py +8 -1
- frontend/server.js +0 -2
- frontend/src/App.js +4 -21
- frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx} +12 -12
- frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx} +6 -26
- frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx} +152 -180
- frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js +192 -0
- frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js +106 -0
- frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js +66 -0
- frontend/src/components/BenchmarkEvaluation.jsx +0 -401
- frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx} +11 -8
- frontend/src/components/Evaluation/Evaluation.jsx +150 -0
- frontend/src/components/Evaluation/hooks/useEvaluation.js +148 -0
- frontend/src/components/Evaluation/hooks/useSimulation.js +59 -0
- frontend/src/components/Evaluation/hooks/useTimer.js +48 -0
- frontend/src/components/Footer/Footer.js +7 -5
- frontend/src/components/Intro.jsx +14 -2
- frontend/src/components/KeyboardShortcuts.jsx +0 -24
- frontend/src/components/{ExternalLinks.jsx → Navigation.jsx} +3 -3
- frontend/src/components/common/ErrorDisplay.jsx +43 -0
- frontend/src/components/shared/AuthContainer.js +0 -192
- frontend/src/components/shared/CodeBlock.js +0 -37
- frontend/src/components/shared/FilterTag.js +0 -139
- frontend/src/components/shared/InfoIconWithTooltip.js +0 -87
- frontend/src/components/shared/PageHeader.js +0 -29
- frontend/src/pages/BenchmarkDisplayPage.jsx +2 -2
- frontend/src/pages/BenchmarkEvaluationPage.jsx +2 -2
- frontend/src/pages/BenchmarkGenerationPage.jsx +2 -2
- frontend/src/pages/EvaluationDisplayPage.jsx +5 -9
- frontend/src/pages/HomePage.jsx +2 -2
backend/lighteval_task/lighteval_task.py
CHANGED
@@ -218,11 +218,10 @@ def process_judge_response_yourbench(response):
|
|
218 |
class JudgeLLMYourBench(JudgeLLM):
|
219 |
def __init__(self):
|
220 |
super().__init__(
|
221 |
-
judge_model_name="
|
222 |
template=get_judge_prompt,
|
223 |
process_judge_response=process_judge_response_yourbench,
|
224 |
-
judge_backend="
|
225 |
-
hf_provider="novita",
|
226 |
short_judge_name="yourbench_judge",
|
227 |
)
|
228 |
|
|
|
218 |
class JudgeLLMYourBench(JudgeLLM):
|
219 |
def __init__(self):
|
220 |
super().__init__(
|
221 |
+
judge_model_name="gpt-4o-2024-08-06",
|
222 |
template=get_judge_prompt,
|
223 |
process_judge_response=process_judge_response_yourbench,
|
224 |
+
judge_backend="openai",
|
|
|
225 |
short_judge_name="yourbench_judge",
|
226 |
)
|
227 |
|
backend/pyproject.toml
CHANGED
@@ -25,6 +25,7 @@ dependencies = [
|
|
25 |
"pydantic>=2.6.0",
|
26 |
"PyPDF2>=3.0.0",
|
27 |
"beautifulsoup4>=4.12.0",
|
|
|
28 |
]
|
29 |
|
30 |
[build-system]
|
|
|
25 |
"pydantic>=2.6.0",
|
26 |
"PyPDF2>=3.0.0",
|
27 |
"beautifulsoup4>=4.12.0",
|
28 |
+
"evaluate>=0.4.0",
|
29 |
]
|
30 |
|
31 |
[build-system]
|
backend/routes/cleanup.py
CHANGED
@@ -27,7 +27,7 @@ async def cleanup_session(session_id: str):
|
|
27 |
"""
|
28 |
# Check if we are in development mode
|
29 |
# if os.environ.get("ENVIRONEMENT", "").lower() == "development":
|
30 |
-
if
|
31 |
logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
|
32 |
return {
|
33 |
"success": True,
|
|
|
27 |
"""
|
28 |
# Check if we are in development mode
|
29 |
# if os.environ.get("ENVIRONEMENT", "").lower() == "development":
|
30 |
+
if True:
|
31 |
logging.info(f"[DEV MODE] Cleanup called for session: {session_id} - No action taken in development mode")
|
32 |
return {
|
33 |
"success": True,
|
backend/routes/upload.py
CHANGED
@@ -14,12 +14,23 @@ session_files = {}
|
|
14 |
UPLOAD_ROOT = "uploaded_files"
|
15 |
os.makedirs(UPLOAD_ROOT, exist_ok=True)
|
16 |
|
|
|
|
|
|
|
17 |
def validate_pdf(file_path: str) -> bool:
|
18 |
"""Validate if file is a valid PDF."""
|
19 |
try:
|
20 |
reader = PdfReader(file_path)
|
21 |
# Vérifier que le PDF a au moins une page
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
except:
|
24 |
return False
|
25 |
|
@@ -28,8 +39,8 @@ def validate_markdown(file_path: str) -> bool:
|
|
28 |
try:
|
29 |
with open(file_path, 'r', encoding='utf-8') as f:
|
30 |
content = f.read()
|
31 |
-
#
|
32 |
-
return len(content)
|
33 |
except:
|
34 |
return False
|
35 |
|
@@ -37,7 +48,11 @@ def validate_html(file_path: str) -> bool:
|
|
37 |
"""Validate if file is a valid HTML file."""
|
38 |
try:
|
39 |
with open(file_path, 'r', encoding='utf-8') as f:
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
return True
|
42 |
except:
|
43 |
return False
|
@@ -47,7 +62,7 @@ def validate_txt(file_path: str) -> bool:
|
|
47 |
try:
|
48 |
with open(file_path, 'r', encoding='utf-8') as f:
|
49 |
content = f.read()
|
50 |
-
return len(content.strip())
|
51 |
except:
|
52 |
return False
|
53 |
|
@@ -112,19 +127,76 @@ async def upload_file(file: UploadFile = File(...)):
|
|
112 |
|
113 |
# Valider le fichier selon son type
|
114 |
is_valid = False
|
|
|
|
|
115 |
if file_extension == '.pdf':
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
elif file_extension == '.md':
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
elif file_extension == '.html':
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
elif file_extension == '.txt':
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
if not is_valid:
|
125 |
# Supprimer le fichier invalide
|
126 |
os.remove(file_path)
|
127 |
-
raise HTTPException(status_code=400, detail=f"Invalid {file_extension[1:].upper()} file")
|
128 |
|
129 |
# Store file path for later use
|
130 |
session_files[session_id] = file_path
|
|
|
14 |
UPLOAD_ROOT = "uploaded_files"
|
15 |
os.makedirs(UPLOAD_ROOT, exist_ok=True)
|
16 |
|
17 |
+
# Longueur minimale pour tout fichier (en caractères)
|
18 |
+
MIN_FILE_LENGTH = 500
|
19 |
+
|
20 |
def validate_pdf(file_path: str) -> bool:
|
21 |
"""Validate if file is a valid PDF."""
|
22 |
try:
|
23 |
reader = PdfReader(file_path)
|
24 |
# Vérifier que le PDF a au moins une page
|
25 |
+
if len(reader.pages) == 0:
|
26 |
+
return False
|
27 |
+
|
28 |
+
# Extraire le texte pour vérifier la longueur
|
29 |
+
text = ""
|
30 |
+
for page in reader.pages:
|
31 |
+
text += page.extract_text()
|
32 |
+
|
33 |
+
return len(text) >= MIN_FILE_LENGTH
|
34 |
except:
|
35 |
return False
|
36 |
|
|
|
39 |
try:
|
40 |
with open(file_path, 'r', encoding='utf-8') as f:
|
41 |
content = f.read()
|
42 |
+
# Vérifier longueur minimale et présence d'éléments markdown
|
43 |
+
return len(content) >= MIN_FILE_LENGTH and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
|
44 |
except:
|
45 |
return False
|
46 |
|
|
|
48 |
"""Validate if file is a valid HTML file."""
|
49 |
try:
|
50 |
with open(file_path, 'r', encoding='utf-8') as f:
|
51 |
+
content = f.read()
|
52 |
+
# Vérifier longueur minimale et structure HTML
|
53 |
+
if len(content) < MIN_FILE_LENGTH:
|
54 |
+
return False
|
55 |
+
BeautifulSoup(content, 'html.parser')
|
56 |
return True
|
57 |
except:
|
58 |
return False
|
|
|
62 |
try:
|
63 |
with open(file_path, 'r', encoding='utf-8') as f:
|
64 |
content = f.read()
|
65 |
+
return len(content.strip()) >= MIN_FILE_LENGTH
|
66 |
except:
|
67 |
return False
|
68 |
|
|
|
127 |
|
128 |
# Valider le fichier selon son type
|
129 |
is_valid = False
|
130 |
+
error_detail = ""
|
131 |
+
|
132 |
if file_extension == '.pdf':
|
133 |
+
try:
|
134 |
+
reader = PdfReader(file_path)
|
135 |
+
if len(reader.pages) == 0:
|
136 |
+
error_detail = "PDF must contain at least one page"
|
137 |
+
is_valid = False
|
138 |
+
else:
|
139 |
+
text = ""
|
140 |
+
for page in reader.pages:
|
141 |
+
text += page.extract_text()
|
142 |
+
|
143 |
+
if len(text) < MIN_FILE_LENGTH:
|
144 |
+
error_detail = f"PDF contains {len(text)} characters but must contain at least {MIN_FILE_LENGTH}"
|
145 |
+
is_valid = False
|
146 |
+
else:
|
147 |
+
is_valid = True
|
148 |
+
except:
|
149 |
+
error_detail = "Invalid PDF format"
|
150 |
+
is_valid = False
|
151 |
elif file_extension == '.md':
|
152 |
+
try:
|
153 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
154 |
+
content = f.read()
|
155 |
+
|
156 |
+
if len(content) < MIN_FILE_LENGTH:
|
157 |
+
error_detail = f"Markdown file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
|
158 |
+
is_valid = False
|
159 |
+
elif not any(marker in content for marker in ['#', '-', '*', '`', '[', '>']):
|
160 |
+
error_detail = "Markdown file does not contain any valid Markdown elements"
|
161 |
+
is_valid = False
|
162 |
+
else:
|
163 |
+
is_valid = True
|
164 |
+
except:
|
165 |
+
error_detail = "Invalid Markdown format"
|
166 |
+
is_valid = False
|
167 |
elif file_extension == '.html':
|
168 |
+
try:
|
169 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
170 |
+
content = f.read()
|
171 |
+
|
172 |
+
if len(content) < MIN_FILE_LENGTH:
|
173 |
+
error_detail = f"HTML file contains {len(content)} characters but must contain at least {MIN_FILE_LENGTH}"
|
174 |
+
is_valid = False
|
175 |
+
else:
|
176 |
+
BeautifulSoup(content, 'html.parser')
|
177 |
+
is_valid = True
|
178 |
+
except:
|
179 |
+
error_detail = "Invalid HTML format"
|
180 |
+
is_valid = False
|
181 |
elif file_extension == '.txt':
|
182 |
+
try:
|
183 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
184 |
+
content = f.read()
|
185 |
+
content_length = len(content.strip())
|
186 |
+
|
187 |
+
if content_length < MIN_FILE_LENGTH:
|
188 |
+
error_detail = f"Text file contains {content_length} characters but must contain at least {MIN_FILE_LENGTH}"
|
189 |
+
is_valid = False
|
190 |
+
else:
|
191 |
+
is_valid = True
|
192 |
+
except:
|
193 |
+
error_detail = "Invalid text format"
|
194 |
+
is_valid = False
|
195 |
|
196 |
if not is_valid:
|
197 |
# Supprimer le fichier invalide
|
198 |
os.remove(file_path)
|
199 |
+
raise HTTPException(status_code=400, detail=error_detail or f"Invalid {file_extension[1:].upper()} file")
|
200 |
|
201 |
# Store file path for later use
|
202 |
session_files[session_id] = file_path
|
backend/tasks/create_bench_config_file.py
CHANGED
@@ -123,7 +123,8 @@ class CreateBenchConfigTask:
|
|
123 |
required_models = [
|
124 |
# "Qwen/Qwen2.5-72B-Instruct"
|
125 |
# "meta-llama/Llama-3.1-8B-Instruct"
|
126 |
-
"Qwen/Qwen2.5-32B-Instruct"
|
|
|
127 |
]
|
128 |
|
129 |
# Track found models
|
@@ -166,11 +167,11 @@ class CreateBenchConfigTask:
|
|
166 |
"model_list": model_list,
|
167 |
|
168 |
"model_roles": {
|
169 |
-
"ingestion": ["
|
170 |
-
"summarization": ["
|
171 |
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
172 |
-
"single_shot_question_generation": ["
|
173 |
-
"multi_hop_question_generation": ["
|
174 |
},
|
175 |
"pipeline": {
|
176 |
"ingestion": {
|
@@ -201,7 +202,7 @@ class CreateBenchConfigTask:
|
|
201 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
202 |
"chunk_sampling": {
|
203 |
"mode": "count",
|
204 |
-
"value":
|
205 |
"random_seed": 123,
|
206 |
},
|
207 |
},
|
|
|
123 |
required_models = [
|
124 |
# "Qwen/Qwen2.5-72B-Instruct"
|
125 |
# "meta-llama/Llama-3.1-8B-Instruct"
|
126 |
+
# "Qwen/Qwen2.5-32B-Instruct",
|
127 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
128 |
]
|
129 |
|
130 |
# Track found models
|
|
|
167 |
"model_list": model_list,
|
168 |
|
169 |
"model_roles": {
|
170 |
+
"ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
171 |
+
"summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
172 |
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
173 |
+
"single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
174 |
+
"multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
175 |
},
|
176 |
"pipeline": {
|
177 |
"ingestion": {
|
|
|
202 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
203 |
"chunk_sampling": {
|
204 |
"mode": "count",
|
205 |
+
"value": 5,
|
206 |
"random_seed": 123,
|
207 |
},
|
208 |
},
|
backend/tasks/evaluation_task.py
CHANGED
@@ -15,9 +15,20 @@ from typing import List, Dict
|
|
15 |
from tasks.get_available_model_provider import get_available_model_provider
|
16 |
from huggingface_hub import HfApi
|
17 |
import asyncio
|
|
|
|
|
|
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class EvaluationTask:
|
23 |
"""
|
@@ -42,9 +53,9 @@ class EvaluationTask:
|
|
42 |
self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
|
43 |
self.current_step = "initializing"
|
44 |
self.completed_steps = []
|
45 |
-
self.step_start_time = time.time() #
|
46 |
|
47 |
-
#
|
48 |
if clean_old_results:
|
49 |
self.clean_old_results()
|
50 |
|
@@ -55,18 +66,18 @@ class EvaluationTask:
|
|
55 |
Args:
|
56 |
step: Name of the step to update
|
57 |
"""
|
58 |
-
#
|
59 |
elapsed_since_step_start = time.time() - self.step_start_time
|
60 |
|
61 |
-
#
|
62 |
if elapsed_since_step_start < 1.0:
|
63 |
await asyncio.sleep(1.0 - elapsed_since_step_start)
|
64 |
|
65 |
-
#
|
66 |
self.current_step = step
|
67 |
self.step_start_time = time.time()
|
68 |
|
69 |
-
#
|
70 |
if step not in self.completed_steps:
|
71 |
self.completed_steps.append(step)
|
72 |
|
@@ -114,12 +125,12 @@ class EvaluationTask:
|
|
114 |
Save evaluation results directly to the dataset on the Hub without persisting locally
|
115 |
"""
|
116 |
try:
|
117 |
-
#
|
118 |
sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
|
119 |
|
120 |
-
#
|
121 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
|
122 |
-
#
|
123 |
final_results = {
|
124 |
"metadata": {
|
125 |
"evaluation_date": datetime.now().isoformat(),
|
@@ -143,7 +154,7 @@ class EvaluationTask:
|
|
143 |
|
144 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
|
145 |
|
146 |
-
#
|
147 |
os.unlink(temp_file_path)
|
148 |
except Exception as e:
|
149 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
|
@@ -267,15 +278,15 @@ TASKS_TABLE = [yourbench]
|
|
267 |
results = json.load(f)
|
268 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
|
269 |
|
270 |
-
#
|
271 |
if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
|
272 |
accuracy = results["results"]["all"]["accuracy"]
|
273 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
|
274 |
else:
|
275 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
276 |
if "results" in results:
|
277 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
278 |
-
raise ValueError(f"
|
279 |
|
280 |
result_data = {
|
281 |
"model": model_name,
|
@@ -315,38 +326,26 @@ TASKS_TABLE = [yourbench]
|
|
315 |
# Load environment variables
|
316 |
load_dotenv()
|
317 |
|
318 |
-
#
|
319 |
-
models = [
|
320 |
-
"Qwen/QwQ-32B",
|
321 |
-
"Qwen/Qwen2.5-72B-Instruct",
|
322 |
-
"Qwen/Qwen2.5-32B-Instruct",
|
323 |
-
"meta-llama/Llama-3.1-8B-Instruct",
|
324 |
-
"meta-llama/Llama-3.3-70B-Instruct",
|
325 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
326 |
-
"mistralai/Mistral-Small-24B-Instruct-2501",
|
327 |
-
]
|
328 |
-
|
329 |
-
# Log pour voir la structure du dataset
|
330 |
try:
|
331 |
-
|
332 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}] Tentative de chargement du dataset {self.dataset_name} pour inspection")
|
333 |
dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
|
334 |
|
335 |
-
#
|
336 |
if len(dataset) > 0:
|
337 |
first_example = dataset[0]
|
338 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure
|
339 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
340 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', '
|
341 |
except Exception as e:
|
342 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
343 |
|
344 |
# Step 1: Check available providers for each model
|
345 |
await self.update_step("finding_available_model_providers")
|
346 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
|
347 |
|
348 |
model_providers = {}
|
349 |
-
for model in
|
350 |
provider = get_available_model_provider(model, verbose=True)
|
351 |
if provider:
|
352 |
model_providers[model] = provider
|
|
|
15 |
from tasks.get_available_model_provider import get_available_model_provider
|
16 |
from huggingface_hub import HfApi
|
17 |
import asyncio
|
18 |
+
from datasets import load_dataset
|
19 |
+
# Default timeout value
|
20 |
+
DEFAULT_EVALUATION_TIMEOUT = 60.0 # 1 minute by default
|
21 |
|
22 |
+
# Models to evaluate - only accessible models
|
23 |
+
DEFAULT_EVALUATION_MODELS = [
|
24 |
+
"Qwen/QwQ-32B",
|
25 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
26 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
27 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
28 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
29 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
30 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
31 |
+
]
|
32 |
|
33 |
class EvaluationTask:
|
34 |
"""
|
|
|
53 |
self.timeout = timeout if timeout is not None else DEFAULT_EVALUATION_TIMEOUT
|
54 |
self.current_step = "initializing"
|
55 |
self.completed_steps = []
|
56 |
+
self.step_start_time = time.time() # Record the start time of the current step
|
57 |
|
58 |
+
# Clean old results if requested
|
59 |
if clean_old_results:
|
60 |
self.clean_old_results()
|
61 |
|
|
|
66 |
Args:
|
67 |
step: Name of the step to update
|
68 |
"""
|
69 |
+
# Calculate the elapsed time since the start of the previous step
|
70 |
elapsed_since_step_start = time.time() - self.step_start_time
|
71 |
|
72 |
+
# If less than one second has passed, wait to complete the second
|
73 |
if elapsed_since_step_start < 1.0:
|
74 |
await asyncio.sleep(1.0 - elapsed_since_step_start)
|
75 |
|
76 |
+
# Update the current step and record the new timestamp
|
77 |
self.current_step = step
|
78 |
self.step_start_time = time.time()
|
79 |
|
80 |
+
# Add to completed steps if necessary
|
81 |
if step not in self.completed_steps:
|
82 |
self.completed_steps.append(step)
|
83 |
|
|
|
125 |
Save evaluation results directly to the dataset on the Hub without persisting locally
|
126 |
"""
|
127 |
try:
|
128 |
+
# Sort results by accuracy (from most accurate to least accurate)
|
129 |
sorted_results = sorted(self.results, key=lambda x: x.get('accuracy', 0), reverse=True)
|
130 |
|
131 |
+
# Create a temporary file for the results
|
132 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file:
|
133 |
+
# Add metadata to the results
|
134 |
final_results = {
|
135 |
"metadata": {
|
136 |
"evaluation_date": datetime.now().isoformat(),
|
|
|
154 |
|
155 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Results saved to Hub at {self.dataset_name}/lighteval_results.json")
|
156 |
|
157 |
+
# Delete the temporary file
|
158 |
os.unlink(temp_file_path)
|
159 |
except Exception as e:
|
160 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Failed to save results to Hub: {str(e)}")
|
|
|
278 |
results = json.load(f)
|
279 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Results structure: {json.dumps(list(results.keys()))}")
|
280 |
|
281 |
+
# Verify that the structure is as expected
|
282 |
if "results" in results and "all" in results["results"] and "accuracy" in results["results"]["all"]:
|
283 |
accuracy = results["results"]["all"]["accuracy"]
|
284 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Extracted accuracy: {accuracy}")
|
285 |
else:
|
286 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Unexpected results structure. Available keys: {list(results.keys())}")
|
287 |
if "results" in results:
|
288 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys in 'results': {list(results['results'].keys()) if isinstance(results['results'], dict) else 'not a dictionary'}")
|
289 |
+
raise ValueError(f"Unexpected results structure for {model_name}")
|
290 |
|
291 |
result_data = {
|
292 |
"model": model_name,
|
|
|
326 |
# Load environment variables
|
327 |
load_dotenv()
|
328 |
|
329 |
+
# Log to see the structure of the dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
try:
|
331 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Attempting to load dataset {self.dataset_name} for inspection")
|
|
|
332 |
dataset = load_dataset(self.dataset_name, "single_shot_questions", split="train")
|
333 |
|
334 |
+
# Verify the structure of the first example
|
335 |
if len(dataset) > 0:
|
336 |
first_example = dataset[0]
|
337 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure of the first example:")
|
338 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Keys: {first_example.keys()}")
|
339 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Citations: {first_example.get('citations', 'not found')}")
|
340 |
except Exception as e:
|
341 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Error inspecting the dataset: {str(e)}")
|
342 |
|
343 |
# Step 1: Check available providers for each model
|
344 |
await self.update_step("finding_available_model_providers")
|
345 |
print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking available providers for models...")
|
346 |
|
347 |
model_providers = {}
|
348 |
+
for model in DEFAULT_EVALUATION_MODELS:
|
349 |
provider = get_available_model_provider(model, verbose=True)
|
350 |
if provider:
|
351 |
model_providers[model] = provider
|
backend/tasks/get_available_model_provider.py
CHANGED
@@ -8,7 +8,7 @@ from dotenv import load_dotenv
|
|
8 |
load_dotenv()
|
9 |
|
10 |
# Define preferred providers
|
11 |
-
PREFERRED_PROVIDERS = ["fireworks-ai",
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
@@ -30,11 +30,17 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
30 |
Returns:
|
31 |
True if the provider is available, False otherwise
|
32 |
"""
|
|
|
33 |
try:
|
34 |
# Get HF token from environment
|
35 |
hf_token = os.environ.get("HF_TOKEN")
|
36 |
if not hf_token:
|
37 |
raise ValueError("HF_TOKEN not defined in environment")
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
if verbose:
|
40 |
logger.info(f"Testing provider {provider} for model {model_name}")
|
@@ -44,6 +50,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
44 |
model=model_name,
|
45 |
token=hf_token,
|
46 |
provider=provider,
|
|
|
47 |
timeout=10 # Increased timeout to allow model loading
|
48 |
)
|
49 |
|
|
|
8 |
load_dotenv()
|
9 |
|
10 |
# Define preferred providers
|
11 |
+
PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
|
30 |
Returns:
|
31 |
True if the provider is available, False otherwise
|
32 |
"""
|
33 |
+
|
34 |
try:
|
35 |
# Get HF token from environment
|
36 |
hf_token = os.environ.get("HF_TOKEN")
|
37 |
if not hf_token:
|
38 |
raise ValueError("HF_TOKEN not defined in environment")
|
39 |
+
# Get HF token from environment
|
40 |
+
hf_organization = os.environ.get("HF_ORGANIZATION")
|
41 |
+
if not hf_organization:
|
42 |
+
raise ValueError("HF_ORGANIZATION not defined in environment")
|
43 |
+
|
44 |
|
45 |
if verbose:
|
46 |
logger.info(f"Testing provider {provider} for model {model_name}")
|
|
|
50 |
model=model_name,
|
51 |
token=hf_token,
|
52 |
provider=provider,
|
53 |
+
bill_to=hf_organization,
|
54 |
timeout=10 # Increased timeout to allow model loading
|
55 |
)
|
56 |
|
frontend/server.js
CHANGED
@@ -34,8 +34,6 @@ app.use(
|
|
34 |
"/health",
|
35 |
"/upload",
|
36 |
"/generate-benchmark",
|
37 |
-
"/config-logs",
|
38 |
-
"/benchmark-logs",
|
39 |
"/benchmark-progress",
|
40 |
"/benchmark-questions",
|
41 |
"/evaluate-benchmark",
|
|
|
34 |
"/health",
|
35 |
"/upload",
|
36 |
"/generate-benchmark",
|
|
|
|
|
37 |
"/benchmark-progress",
|
38 |
"/benchmark-questions",
|
39 |
"/evaluate-benchmark",
|
frontend/src/App.js
CHANGED
@@ -9,14 +9,13 @@ import {
|
|
9 |
import getTheme from "./config/theme";
|
10 |
import { useThemeMode } from "./hooks/useThemeMode";
|
11 |
import { ThemeProvider } from "@mui/material/styles";
|
12 |
-
import
|
13 |
-
import KeyboardShortcuts from "./components/KeyboardShortcuts";
|
14 |
import HomePage from "./pages/HomePage";
|
15 |
import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
|
16 |
import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
|
17 |
import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
|
18 |
import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
|
19 |
-
|
20 |
// Function to synchronize URL hash with parent Hugging Face page
|
21 |
const syncURLWithParent = () => {
|
22 |
// This function is only necessary in a Hugging Face Spaces environment
|
@@ -83,9 +82,8 @@ function App() {
|
|
83 |
<CssBaseline />
|
84 |
<Router>
|
85 |
<Container maxWidth="md">
|
86 |
-
<
|
87 |
<Box sx={{ pt: 12, pb: 4 }}>
|
88 |
-
<KeyboardShortcuts />
|
89 |
<Routes>
|
90 |
<Route path="/" element={<HomePage />} />
|
91 |
<Route
|
@@ -106,22 +104,7 @@ function App() {
|
|
106 |
/>
|
107 |
<Route path="*" element={<Navigate to="/" replace />} />
|
108 |
</Routes>
|
109 |
-
<
|
110 |
-
component="footer"
|
111 |
-
sx={{
|
112 |
-
mt: 4,
|
113 |
-
textAlign: "center",
|
114 |
-
fontSize: "0.875rem",
|
115 |
-
color: "text.secondary",
|
116 |
-
opacity: 0.7,
|
117 |
-
maxWidth: { xs: "100%", md: "70%" },
|
118 |
-
mx: "auto",
|
119 |
-
}}
|
120 |
-
>
|
121 |
-
We keep processed documents for research purposes, to which you
|
122 |
-
agree by using the space. For a fully private usage, please
|
123 |
-
duplicate the advanced space
|
124 |
-
</Box>
|
125 |
</Box>
|
126 |
</Container>
|
127 |
</Router>
|
|
|
9 |
import getTheme from "./config/theme";
|
10 |
import { useThemeMode } from "./hooks/useThemeMode";
|
11 |
import { ThemeProvider } from "@mui/material/styles";
|
12 |
+
import Navigation from "./components/Navigation";
|
|
|
13 |
import HomePage from "./pages/HomePage";
|
14 |
import BenchmarkGenerationPage from "./pages/BenchmarkGenerationPage";
|
15 |
import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
|
16 |
import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
|
17 |
import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
|
18 |
+
import Footer from "./components/Footer/Footer";
|
19 |
// Function to synchronize URL hash with parent Hugging Face page
|
20 |
const syncURLWithParent = () => {
|
21 |
// This function is only necessary in a Hugging Face Spaces environment
|
|
|
82 |
<CssBaseline />
|
83 |
<Router>
|
84 |
<Container maxWidth="md">
|
85 |
+
<Navigation />
|
86 |
<Box sx={{ pt: 12, pb: 4 }}>
|
|
|
87 |
<Routes>
|
88 |
<Route path="/" element={<HomePage />} />
|
89 |
<Route
|
|
|
104 |
/>
|
105 |
<Route path="*" element={<Navigate to="/" replace />} />
|
106 |
</Routes>
|
107 |
+
<Footer />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
</Box>
|
109 |
</Container>
|
110 |
</Router>
|
frontend/src/components/{BenchmarkCreateForm.jsx → Benchmark/CreateForm.jsx}
RENAMED
@@ -25,18 +25,18 @@ import MenuBookIcon from "@mui/icons-material/MenuBook";
|
|
25 |
import DownloadIcon from "@mui/icons-material/Download";
|
26 |
import VisibilityIcon from "@mui/icons-material/Visibility";
|
27 |
import CloseIcon from "@mui/icons-material/Close";
|
28 |
-
import { useThemeMode } from "
|
29 |
-
import getTheme from "
|
30 |
-
import API_CONFIG from "
|
31 |
|
32 |
/**
|
33 |
* Component for creating a new benchmark, including file upload and generation initiation
|
34 |
*
|
35 |
* @param {Object} props - Component props
|
36 |
* @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
|
37 |
-
* @returns {JSX.Element}
|
38 |
*/
|
39 |
-
function
|
40 |
const { mode } = useThemeMode();
|
41 |
const theme = getTheme(mode);
|
42 |
const [isDragging, setIsDragging] = useState(false);
|
@@ -110,11 +110,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
110 |
return;
|
111 |
}
|
112 |
|
113 |
-
// Check file size limit (
|
114 |
-
if (file.size > 1048576) {
|
115 |
setUploadStatus({
|
116 |
success: false,
|
117 |
-
message: "File size exceeds the
|
118 |
});
|
119 |
setOpenSnackbar(true);
|
120 |
return;
|
@@ -192,11 +192,11 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
192 |
return;
|
193 |
}
|
194 |
|
195 |
-
// Check file size limit (
|
196 |
-
if (file.size >
|
197 |
setUploadStatus({
|
198 |
success: false,
|
199 |
-
message: "File size exceeds the
|
200 |
});
|
201 |
setOpenSnackbar(true);
|
202 |
return;
|
@@ -580,4 +580,4 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
580 |
);
|
581 |
}
|
582 |
|
583 |
-
export default
|
|
|
25 |
import DownloadIcon from "@mui/icons-material/Download";
|
26 |
import VisibilityIcon from "@mui/icons-material/Visibility";
|
27 |
import CloseIcon from "@mui/icons-material/Close";
|
28 |
+
import { useThemeMode } from "../../hooks/useThemeMode";
|
29 |
+
import getTheme from "../../config/theme";
|
30 |
+
import API_CONFIG from "../../config/api";
|
31 |
|
32 |
/**
|
33 |
* Component for creating a new benchmark, including file upload and generation initiation
|
34 |
*
|
35 |
* @param {Object} props - Component props
|
36 |
* @param {Function} props.onStartGeneration - Callback when generation starts with sessionId
|
37 |
+
* @returns {JSX.Element} CreateForm component
|
38 |
*/
|
39 |
+
function CreateForm({ onStartGeneration }) {
|
40 |
const { mode } = useThemeMode();
|
41 |
const theme = getTheme(mode);
|
42 |
const [isDragging, setIsDragging] = useState(false);
|
|
|
110 |
return;
|
111 |
}
|
112 |
|
113 |
+
// Check file size limit (3MB = 3145728 bytes)
|
114 |
+
if (file.size > 1048576 * 2) {
|
115 |
setUploadStatus({
|
116 |
success: false,
|
117 |
+
message: "File size exceeds the 2MB limit",
|
118 |
});
|
119 |
setOpenSnackbar(true);
|
120 |
return;
|
|
|
192 |
return;
|
193 |
}
|
194 |
|
195 |
+
// Check file size limit (3MB = 3145728 bytes)
|
196 |
+
if (file.size > 1048576 * 3) {
|
197 |
setUploadStatus({
|
198 |
success: false,
|
199 |
+
message: "File size exceeds the 3MB limit",
|
200 |
});
|
201 |
setOpenSnackbar(true);
|
202 |
return;
|
|
|
580 |
);
|
581 |
}
|
582 |
|
583 |
+
export default CreateForm;
|
frontend/src/components/{BenchmarkDisplay.jsx → Benchmark/Display.jsx}
RENAMED
@@ -16,9 +16,9 @@ import AssessmentIcon from "@mui/icons-material/Assessment";
|
|
16 |
import LinkIcon from "@mui/icons-material/Link";
|
17 |
import DownloadIcon from "@mui/icons-material/Download";
|
18 |
import CheckCircleIcon from "@mui/icons-material/CheckCircle";
|
19 |
-
import API_CONFIG from "
|
20 |
-
import { useThemeMode } from "
|
21 |
-
import getTheme from "
|
22 |
|
23 |
/**
|
24 |
* Component to display benchmark information and evaluation button
|
@@ -30,7 +30,7 @@ import getTheme from "../config/theme";
|
|
30 |
* @param {string} props.datasetUrl - URL to the Hugging Face dataset
|
31 |
* @returns {JSX.Element} Benchmark display component
|
32 |
*/
|
33 |
-
const
|
34 |
sampleQuestions = [],
|
35 |
onStartEvaluation,
|
36 |
sessionId,
|
@@ -40,26 +40,6 @@ const BenchmarkDisplay = ({
|
|
40 |
const { mode } = useThemeMode();
|
41 |
const theme = getTheme(mode);
|
42 |
|
43 |
-
// Default questions if none provided
|
44 |
-
const questions =
|
45 |
-
sampleQuestions.length > 0
|
46 |
-
? sampleQuestions
|
47 |
-
: [
|
48 |
-
{
|
49 |
-
id: 1,
|
50 |
-
question: "What are the key benefits of the described technology?",
|
51 |
-
answer: "No answer available",
|
52 |
-
type: "single_shot",
|
53 |
-
},
|
54 |
-
{
|
55 |
-
id: 2,
|
56 |
-
question:
|
57 |
-
"Based on the context about machine learning frameworks, how does TensorFlow compare to PyTorch in terms of deployment capabilities?",
|
58 |
-
answer: "No answer available",
|
59 |
-
type: "multi_hop",
|
60 |
-
},
|
61 |
-
];
|
62 |
-
|
63 |
const handleEvaluationClick = () => {
|
64 |
if (onStartEvaluation) {
|
65 |
onStartEvaluation();
|
@@ -139,7 +119,7 @@ const BenchmarkDisplay = ({
|
|
139 |
</Typography>
|
140 |
|
141 |
<Box sx={{ mb: 3 }}>
|
142 |
-
{
|
143 |
<Card
|
144 |
key={q.id || index}
|
145 |
variant="outlined"
|
@@ -179,4 +159,4 @@ const BenchmarkDisplay = ({
|
|
179 |
);
|
180 |
};
|
181 |
|
182 |
-
export default
|
|
|
16 |
import LinkIcon from "@mui/icons-material/Link";
|
17 |
import DownloadIcon from "@mui/icons-material/Download";
|
18 |
import CheckCircleIcon from "@mui/icons-material/CheckCircle";
|
19 |
+
import API_CONFIG from "../../config/api";
|
20 |
+
import { useThemeMode } from "../../hooks/useThemeMode";
|
21 |
+
import getTheme from "../../config/theme";
|
22 |
|
23 |
/**
|
24 |
* Component to display benchmark information and evaluation button
|
|
|
30 |
* @param {string} props.datasetUrl - URL to the Hugging Face dataset
|
31 |
* @returns {JSX.Element} Benchmark display component
|
32 |
*/
|
33 |
+
const Display = ({
|
34 |
sampleQuestions = [],
|
35 |
onStartEvaluation,
|
36 |
sessionId,
|
|
|
40 |
const { mode } = useThemeMode();
|
41 |
const theme = getTheme(mode);
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
const handleEvaluationClick = () => {
|
44 |
if (onStartEvaluation) {
|
45 |
onStartEvaluation();
|
|
|
119 |
</Typography>
|
120 |
|
121 |
<Box sx={{ mb: 3 }}>
|
122 |
+
{sampleQuestions.map((q, index) => (
|
123 |
<Card
|
124 |
key={q.id || index}
|
125 |
variant="outlined"
|
|
|
159 |
);
|
160 |
};
|
161 |
|
162 |
+
export default Display;
|
frontend/src/components/{BenchmarkGenerator.jsx → Benchmark/Generator.jsx}
RENAMED
@@ -2,14 +2,15 @@ import React, { useState, useEffect, useRef } from "react";
|
|
2 |
import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
|
3 |
import PlayArrowIcon from "@mui/icons-material/PlayArrow";
|
4 |
import AccessTimeIcon from "@mui/icons-material/AccessTime";
|
5 |
-
import LogDisplay from "
|
6 |
import { useNavigate, useSearchParams } from "react-router-dom";
|
7 |
-
import API_CONFIG from "
|
|
|
8 |
|
9 |
-
//
|
10 |
const SIMULATION_DURATION = 80000; // 20 seconds
|
11 |
|
12 |
-
//
|
13 |
const BENCHMARK_STEPS = [
|
14 |
"configuration",
|
15 |
"provider_check",
|
@@ -20,7 +21,7 @@ const BENCHMARK_STEPS = [
|
|
20 |
"single_shot_question_generation",
|
21 |
];
|
22 |
|
23 |
-
//
|
24 |
const STEP_LABELS = {
|
25 |
configuration: "Configuration",
|
26 |
provider_check: "Finding providers",
|
@@ -34,7 +35,7 @@ const STEP_LABELS = {
|
|
34 |
evaluation_saving_results: "Saving evaluation results",
|
35 |
};
|
36 |
|
37 |
-
//
|
38 |
const SIMULATED_LOGS = [
|
39 |
"[INFO] Initializing benchmark generation...",
|
40 |
"[INFO] Generating base configuration file...",
|
@@ -55,18 +56,21 @@ const SIMULATED_LOGS = [
|
|
55 |
];
|
56 |
|
57 |
/**
|
58 |
-
*
|
59 |
*
|
60 |
-
* @param {Object} props -
|
61 |
-
* @param {string} props.sessionId -
|
62 |
-
* @param {boolean} props.isDefaultDocument -
|
63 |
-
* @param {Function} props.onComplete -
|
64 |
-
* @returns {JSX.Element}
|
65 |
*/
|
66 |
-
const
|
|
|
67 |
const [searchParams] = useSearchParams();
|
68 |
const isDefault =
|
69 |
searchParams.get("isDefault") === "true" || isDefaultDocument;
|
|
|
|
|
70 |
const [generating, setGenerating] = useState(false);
|
71 |
const [generationComplete, setGenerationComplete] = useState(false);
|
72 |
const [generationLogs, setGenerationLogs] = useState([]);
|
@@ -76,53 +80,68 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
76 |
const [activeStep, setActiveStep] = useState(1);
|
77 |
const [elapsedTime, setElapsedTime] = useState(0);
|
78 |
|
79 |
-
//
|
80 |
const pollingIntervalRef = useRef(null);
|
81 |
-
|
82 |
-
// Reference to keep track of the timer interval
|
83 |
const timerIntervalRef = useRef(null);
|
84 |
-
|
85 |
-
// Reference for starting time
|
86 |
const startTimeRef = useRef(null);
|
87 |
-
|
88 |
-
// Simulation interval reference
|
89 |
const simulationIntervalRef = useRef(null);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
useEffect(() => {
|
93 |
-
//
|
94 |
startTimeRef.current = Date.now();
|
95 |
|
96 |
-
//
|
97 |
-
let timeoutRef = null;
|
98 |
-
|
99 |
-
// Start timer
|
100 |
timerIntervalRef.current = setInterval(() => {
|
101 |
const timeElapsed = Math.floor(
|
102 |
(Date.now() - startTimeRef.current) / 1000
|
103 |
);
|
104 |
setElapsedTime(timeElapsed);
|
105 |
|
106 |
-
//
|
107 |
-
if (timeElapsed >
|
108 |
-
// Display an error message in case of timeout
|
109 |
setError(
|
110 |
"The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
|
111 |
);
|
112 |
-
|
113 |
-
|
114 |
-
// Clear intervals
|
115 |
-
if (pollingIntervalRef.current) {
|
116 |
-
clearInterval(pollingIntervalRef.current);
|
117 |
-
}
|
118 |
-
|
119 |
-
if (timerIntervalRef.current) {
|
120 |
-
clearInterval(timerIntervalRef.current);
|
121 |
-
}
|
122 |
}
|
123 |
}, 1000);
|
124 |
|
125 |
-
//
|
126 |
const handleVisibilityChange = () => {
|
127 |
if (
|
128 |
document.visibilityState === "visible" &&
|
@@ -130,45 +149,22 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
130 |
!generationComplete
|
131 |
) {
|
132 |
console.log("Page became visible, checking for missed steps...");
|
133 |
-
|
|
|
134 |
const checkCurrentState = async () => {
|
135 |
try {
|
136 |
-
|
137 |
-
|
138 |
-
`${API_CONFIG.BASE_URL}/benchmark-logs/${sessionId}`
|
139 |
);
|
140 |
|
141 |
-
if (
|
142 |
-
const
|
143 |
-
if (
|
144 |
-
setGenerationLogs(
|
145 |
}
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
setGenerationComplete(true);
|
150 |
-
if (pollingIntervalRef.current) {
|
151 |
-
clearInterval(pollingIntervalRef.current);
|
152 |
-
}
|
153 |
-
if (onComplete) {
|
154 |
-
onComplete({
|
155 |
-
success: true,
|
156 |
-
sessionId,
|
157 |
-
logs: logsResult.logs,
|
158 |
-
});
|
159 |
-
}
|
160 |
-
}
|
161 |
-
} else {
|
162 |
-
// If the benchmark task does not exist, try the configuration logs
|
163 |
-
const configResponse = await fetch(
|
164 |
-
`${API_CONFIG.BASE_URL}/config-logs/${sessionId}`
|
165 |
-
);
|
166 |
-
|
167 |
-
if (configResponse.ok) {
|
168 |
-
const configResult = await configResponse.json();
|
169 |
-
if (configResult.logs) {
|
170 |
-
setGenerationLogs(configResult.logs);
|
171 |
-
}
|
172 |
}
|
173 |
}
|
174 |
} catch (error) {
|
@@ -180,103 +176,89 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
180 |
}
|
181 |
};
|
182 |
|
183 |
-
//
|
184 |
document.addEventListener("visibilitychange", handleVisibilityChange);
|
185 |
|
|
|
186 |
if (isDefault) {
|
187 |
simulateGeneration();
|
188 |
} else {
|
189 |
generateBenchmark();
|
190 |
}
|
191 |
|
192 |
-
//
|
193 |
return () => {
|
194 |
-
|
195 |
-
clearInterval(pollingIntervalRef.current);
|
196 |
-
}
|
197 |
-
if (timerIntervalRef.current) {
|
198 |
-
clearInterval(timerIntervalRef.current);
|
199 |
-
}
|
200 |
-
if (simulationIntervalRef.current) {
|
201 |
-
clearInterval(simulationIntervalRef.current);
|
202 |
-
}
|
203 |
document.removeEventListener("visibilitychange", handleVisibilityChange);
|
204 |
};
|
205 |
}, [isDefault, sessionId, generationComplete, onComplete]);
|
206 |
|
207 |
-
//
|
208 |
const simulateGeneration = () => {
|
209 |
-
|
210 |
-
setGenerationLogs([]);
|
211 |
-
setError(null);
|
212 |
-
setCurrentPhase("initializing");
|
213 |
-
setCompletedSteps([]);
|
214 |
-
setActiveStep(1);
|
215 |
|
216 |
-
//
|
217 |
const totalSteps = SIMULATED_LOGS.length;
|
218 |
-
const
|
219 |
-
const intervalPerStep = totalDuration / totalSteps;
|
220 |
let currentStep = 0;
|
221 |
|
222 |
-
//
|
223 |
const addNextLog = () => {
|
224 |
if (currentStep < SIMULATED_LOGS.length) {
|
225 |
const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
|
226 |
setGenerationLogs(newLogs);
|
227 |
currentStep++;
|
228 |
|
229 |
-
//
|
230 |
if (currentStep >= SIMULATED_LOGS.length) {
|
231 |
-
// Simulation
|
232 |
setTimeout(() => {
|
233 |
setCurrentPhase("complete");
|
234 |
-
|
235 |
-
clearInterval(simulationIntervalRef.current);
|
236 |
-
if (onComplete) {
|
237 |
-
onComplete({
|
238 |
-
success: true,
|
239 |
-
sessionId,
|
240 |
-
logs: newLogs,
|
241 |
-
});
|
242 |
-
}
|
243 |
}, 1000);
|
244 |
}
|
245 |
}
|
246 |
};
|
247 |
|
248 |
-
//
|
249 |
simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
|
250 |
};
|
251 |
|
252 |
-
//
|
253 |
useEffect(() => {
|
254 |
if (generationLogs.length === 0) return;
|
255 |
|
256 |
-
//
|
257 |
-
// instead of just adding new steps
|
258 |
const newCompletedSteps = [];
|
259 |
|
260 |
-
//
|
261 |
-
const
|
262 |
(log) =>
|
263 |
log.includes("RATE_LIMIT_EXCEEDED") ||
|
264 |
log.includes("heavy load") ||
|
265 |
-
log.includes("rate limit")
|
|
|
|
|
|
|
|
|
266 |
);
|
267 |
|
268 |
-
if (
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
276 |
return;
|
277 |
}
|
278 |
|
279 |
-
//
|
280 |
generationLogs.forEach((log) => {
|
281 |
const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
|
282 |
if (match && match[1]) {
|
@@ -290,48 +272,48 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
290 |
}
|
291 |
});
|
292 |
|
293 |
-
//
|
294 |
let newActiveStep = activeStep;
|
295 |
|
296 |
if (newCompletedSteps.length > 0) {
|
297 |
-
//
|
298 |
const maxCompletedStepIndex = Math.max(
|
299 |
...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
|
300 |
);
|
301 |
-
//
|
302 |
const calculatedStep = maxCompletedStepIndex + 1;
|
303 |
|
304 |
-
//
|
305 |
if (calculatedStep > activeStep) {
|
306 |
newActiveStep = calculatedStep;
|
307 |
}
|
308 |
|
309 |
-
//
|
310 |
if (newActiveStep >= BENCHMARK_STEPS.length) {
|
311 |
newActiveStep = BENCHMARK_STEPS.length;
|
312 |
}
|
313 |
} else if (activeStep === 0) {
|
314 |
-
//
|
315 |
newActiveStep = 1;
|
316 |
}
|
317 |
|
318 |
-
//
|
319 |
if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
|
320 |
setCompletedSteps(newCompletedSteps);
|
321 |
}
|
322 |
|
323 |
-
//
|
324 |
if (newActiveStep !== activeStep) {
|
325 |
setActiveStep(newActiveStep);
|
326 |
}
|
327 |
|
328 |
-
//
|
329 |
if (isDefault) return;
|
330 |
|
331 |
-
//
|
332 |
-
const recentLogs = generationLogs.slice(-10);
|
333 |
|
334 |
-
//
|
335 |
const isComplete =
|
336 |
recentLogs.some((log) =>
|
337 |
log.includes("[SUCCESS] Benchmark process completed successfully")
|
@@ -344,20 +326,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
344 |
|
345 |
if (isComplete) {
|
346 |
setCurrentPhase("complete");
|
347 |
-
|
348 |
-
// Stop polling when benchmark is complete
|
349 |
-
if (pollingIntervalRef.current) {
|
350 |
-
clearInterval(pollingIntervalRef.current);
|
351 |
-
}
|
352 |
-
// Notify parent component that generation is complete
|
353 |
-
if (onComplete) {
|
354 |
-
console.log("Notifying parent that generation is complete");
|
355 |
-
onComplete({
|
356 |
-
success: true,
|
357 |
-
sessionId,
|
358 |
-
logs: generationLogs,
|
359 |
-
});
|
360 |
-
}
|
361 |
} else if (
|
362 |
recentLogs.some((log) => log.includes("Starting ingestion process"))
|
363 |
) {
|
@@ -376,31 +345,23 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
376 |
isDefault,
|
377 |
]);
|
378 |
|
|
|
379 |
const generateBenchmark = async () => {
|
380 |
if (!sessionId) {
|
381 |
setError("Missing session ID");
|
382 |
return;
|
383 |
}
|
384 |
|
385 |
-
|
386 |
-
setGenerationLogs([]);
|
387 |
-
setError(null);
|
388 |
-
setCurrentPhase("initializing");
|
389 |
-
setCompletedSteps([]);
|
390 |
-
setActiveStep(1);
|
391 |
|
392 |
try {
|
393 |
-
//
|
394 |
const response = await fetch(
|
395 |
`${API_CONFIG.BASE_URL}/generate-benchmark`,
|
396 |
{
|
397 |
method: "POST",
|
398 |
-
headers: {
|
399 |
-
|
400 |
-
},
|
401 |
-
body: JSON.stringify({
|
402 |
-
session_id: sessionId,
|
403 |
-
}),
|
404 |
}
|
405 |
);
|
406 |
|
@@ -409,16 +370,16 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
409 |
if (response.ok) {
|
410 |
setGenerationLogs(result.logs || []);
|
411 |
|
412 |
-
//
|
413 |
pollingIntervalRef.current = setInterval(async () => {
|
414 |
-
//
|
415 |
if (generationComplete) {
|
416 |
clearInterval(pollingIntervalRef.current);
|
417 |
return;
|
418 |
}
|
419 |
|
420 |
try {
|
421 |
-
//
|
422 |
const logsResponse = await fetch(
|
423 |
`${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
|
424 |
);
|
@@ -426,7 +387,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
426 |
if (logsResponse.ok) {
|
427 |
const logsResult = await logsResponse.json();
|
428 |
|
429 |
-
//
|
430 |
if (
|
431 |
logsResult.logs &&
|
432 |
logsResult.logs.length > generationLogs.length
|
@@ -434,20 +395,19 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
434 |
setGenerationLogs(logsResult.logs);
|
435 |
}
|
436 |
|
437 |
-
//
|
438 |
if (logsResult.is_completed) {
|
439 |
setGenerationComplete(true);
|
440 |
clearInterval(pollingIntervalRef.current);
|
441 |
-
// Notification is now handled in the useEffect above
|
442 |
}
|
443 |
}
|
444 |
} catch (error) {
|
445 |
console.log("Error polling for logs:", error);
|
446 |
-
//
|
447 |
}
|
448 |
-
}, 2000); //
|
449 |
} else {
|
450 |
-
//
|
451 |
setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
|
452 |
setError(result.error || "Benchmark generation failed");
|
453 |
}
|
@@ -460,29 +420,29 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
460 |
}
|
461 |
};
|
462 |
|
463 |
-
//
|
464 |
const getCurrentStepInfo = () => {
|
465 |
const totalSteps = BENCHMARK_STEPS.length;
|
466 |
const currentStepIndex = activeStep;
|
467 |
|
468 |
-
//
|
469 |
if (currentStepIndex <= 1 && completedSteps.length === 0) {
|
470 |
return `Starting (1/${totalSteps})`;
|
471 |
}
|
472 |
|
473 |
-
//
|
474 |
if (currentStepIndex >= totalSteps) {
|
475 |
return `Complete (${totalSteps}/${totalSteps})`;
|
476 |
}
|
477 |
|
478 |
-
//
|
479 |
const currentStepName =
|
480 |
STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
|
481 |
|
482 |
return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
|
483 |
};
|
484 |
|
485 |
-
//
|
486 |
const formatElapsedTime = () => {
|
487 |
const hours = Math.floor(elapsedTime / 3600);
|
488 |
const minutes = Math.floor((elapsedTime % 3600) / 60);
|
@@ -495,13 +455,27 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
495 |
].join(":");
|
496 |
};
|
497 |
|
498 |
-
//
|
499 |
useEffect(() => {
|
500 |
if (generationComplete && timerIntervalRef.current) {
|
501 |
clearInterval(timerIntervalRef.current);
|
502 |
}
|
503 |
}, [generationComplete]);
|
504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
return (
|
506 |
<Paper
|
507 |
elevation={3}
|
@@ -544,9 +518,7 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
544 |
</Box>
|
545 |
|
546 |
{error ? (
|
547 |
-
<
|
548 |
-
{error}
|
549 |
-
</Alert>
|
550 |
) : (
|
551 |
<>
|
552 |
<CircularProgress size={60} sx={{ mb: 2 }} />
|
@@ -581,4 +553,4 @@ const BenchmarkGenerator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
|
581 |
);
|
582 |
};
|
583 |
|
584 |
-
export default
|
|
|
2 |
import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
|
3 |
import PlayArrowIcon from "@mui/icons-material/PlayArrow";
|
4 |
import AccessTimeIcon from "@mui/icons-material/AccessTime";
|
5 |
+
import LogDisplay from "../LogDisplay";
|
6 |
import { useNavigate, useSearchParams } from "react-router-dom";
|
7 |
+
import API_CONFIG from "../../config/api";
|
8 |
+
import ErrorDisplay from "../common/ErrorDisplay";
|
9 |
|
10 |
+
// Durée de simulation en millisecondes pour les documents précalculés
|
11 |
const SIMULATION_DURATION = 80000; // 20 seconds
|
12 |
|
13 |
+
// Définir toutes les étapes du benchmark en séquence
|
14 |
const BENCHMARK_STEPS = [
|
15 |
"configuration",
|
16 |
"provider_check",
|
|
|
21 |
"single_shot_question_generation",
|
22 |
];
|
23 |
|
24 |
+
// Étiquettes des étapes pour l'affichage (noms plus conviviaux)
|
25 |
const STEP_LABELS = {
|
26 |
configuration: "Configuration",
|
27 |
provider_check: "Finding providers",
|
|
|
35 |
evaluation_saving_results: "Saving evaluation results",
|
36 |
};
|
37 |
|
38 |
+
// Messages de log simulés pour les documents précalculés
|
39 |
const SIMULATED_LOGS = [
|
40 |
"[INFO] Initializing benchmark generation...",
|
41 |
"[INFO] Generating base configuration file...",
|
|
|
56 |
];
|
57 |
|
58 |
/**
|
59 |
+
* Composant pour gérer la génération de benchmark et afficher les logs
|
60 |
*
|
61 |
+
* @param {Object} props - Propriétés du composant
|
62 |
+
* @param {string} props.sessionId - ID de session pour le fichier uploadé
|
63 |
+
* @param {boolean} props.isDefaultDocument - S'il s'agit d'un document précalculé
|
64 |
+
* @param {Function} props.onComplete - Fonction à appeler lorsque la génération est terminée
|
65 |
+
* @returns {JSX.Element} Composant de génération de benchmark
|
66 |
*/
|
67 |
+
const Generator = ({ sessionId, isDefaultDocument, onComplete }) => {
|
68 |
+
const navigate = useNavigate();
|
69 |
const [searchParams] = useSearchParams();
|
70 |
const isDefault =
|
71 |
searchParams.get("isDefault") === "true" || isDefaultDocument;
|
72 |
+
|
73 |
+
// États du composant
|
74 |
const [generating, setGenerating] = useState(false);
|
75 |
const [generationComplete, setGenerationComplete] = useState(false);
|
76 |
const [generationLogs, setGenerationLogs] = useState([]);
|
|
|
80 |
const [activeStep, setActiveStep] = useState(1);
|
81 |
const [elapsedTime, setElapsedTime] = useState(0);
|
82 |
|
83 |
+
// Références pour les intervalles et timers
|
84 |
const pollingIntervalRef = useRef(null);
|
|
|
|
|
85 |
const timerIntervalRef = useRef(null);
|
|
|
|
|
86 |
const startTimeRef = useRef(null);
|
|
|
|
|
87 |
const simulationIntervalRef = useRef(null);
|
88 |
+
const hasRedirectedRef = useRef(false);
|
89 |
+
|
90 |
+
// Fonction pour réinitialiser les états de génération
|
91 |
+
const resetGenerationStates = () => {
|
92 |
+
setGenerating(true);
|
93 |
+
setGenerationLogs([]);
|
94 |
+
setError(null);
|
95 |
+
setCurrentPhase("initializing");
|
96 |
+
setCompletedSteps([]);
|
97 |
+
setActiveStep(1);
|
98 |
+
};
|
99 |
+
|
100 |
+
// Fonction pour arrêter les intervalles
|
101 |
+
const clearAllIntervals = () => {
|
102 |
+
if (pollingIntervalRef.current) clearInterval(pollingIntervalRef.current);
|
103 |
+
if (timerIntervalRef.current) clearInterval(timerIntervalRef.current);
|
104 |
+
if (simulationIntervalRef.current)
|
105 |
+
clearInterval(simulationIntervalRef.current);
|
106 |
+
};
|
107 |
|
108 |
+
// Fonction pour notifier la fin de la génération
|
109 |
+
const notifyGenerationComplete = (success, logs, errorMsg = null) => {
|
110 |
+
setGenerationComplete(true);
|
111 |
+
clearAllIntervals();
|
112 |
+
|
113 |
+
if (onComplete) {
|
114 |
+
onComplete({
|
115 |
+
success,
|
116 |
+
sessionId,
|
117 |
+
logs: logs || generationLogs,
|
118 |
+
error: errorMsg,
|
119 |
+
});
|
120 |
+
}
|
121 |
+
};
|
122 |
+
|
123 |
+
// Démarrer la génération au montage du composant
|
124 |
useEffect(() => {
|
125 |
+
// Configurer l'heure de départ
|
126 |
startTimeRef.current = Date.now();
|
127 |
|
128 |
+
// Démarrer le timer
|
|
|
|
|
|
|
129 |
timerIntervalRef.current = setInterval(() => {
|
130 |
const timeElapsed = Math.floor(
|
131 |
(Date.now() - startTimeRef.current) / 1000
|
132 |
);
|
133 |
setElapsedTime(timeElapsed);
|
134 |
|
135 |
+
// Vérifier si le temps écoulé dépasse 5 minutes et que nous ne sommes pas en mode simulation
|
136 |
+
if (timeElapsed > 300 && !isDefault && !generationComplete) {
|
|
|
137 |
setError(
|
138 |
"The benchmark generation is taking too long. The demo is currently under heavy load, please try again later."
|
139 |
);
|
140 |
+
notifyGenerationComplete(false, null, "Timeout error");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
}
|
142 |
}, 1000);
|
143 |
|
144 |
+
// Gestionnaire pour détecter quand la page redevient visible
|
145 |
const handleVisibilityChange = () => {
|
146 |
if (
|
147 |
document.visibilityState === "visible" &&
|
|
|
149 |
!generationComplete
|
150 |
) {
|
151 |
console.log("Page became visible, checking for missed steps...");
|
152 |
+
|
153 |
+
// Forcer une nouvelle requête pour récupérer les logs
|
154 |
const checkCurrentState = async () => {
|
155 |
try {
|
156 |
+
const progressResponse = await fetch(
|
157 |
+
`${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
|
|
|
158 |
);
|
159 |
|
160 |
+
if (progressResponse.ok) {
|
161 |
+
const progressResult = await progressResponse.json();
|
162 |
+
if (progressResult.logs) {
|
163 |
+
setGenerationLogs(progressResult.logs);
|
164 |
}
|
165 |
|
166 |
+
if (progressResult.is_completed) {
|
167 |
+
notifyGenerationComplete(true, progressResult.logs);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
}
|
169 |
}
|
170 |
} catch (error) {
|
|
|
176 |
}
|
177 |
};
|
178 |
|
179 |
+
// Ajouter l'écouteur pour le changement de visibilité
|
180 |
document.addEventListener("visibilitychange", handleVisibilityChange);
|
181 |
|
182 |
+
// Lancer la simulation ou la génération
|
183 |
if (isDefault) {
|
184 |
simulateGeneration();
|
185 |
} else {
|
186 |
generateBenchmark();
|
187 |
}
|
188 |
|
189 |
+
// Nettoyer les intervalles et écouteurs lors du démontage
|
190 |
return () => {
|
191 |
+
clearAllIntervals();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
document.removeEventListener("visibilitychange", handleVisibilityChange);
|
193 |
};
|
194 |
}, [isDefault, sessionId, generationComplete, onComplete]);
|
195 |
|
196 |
+
// Simuler la génération de benchmark pour les documents précalculés
|
197 |
const simulateGeneration = () => {
|
198 |
+
resetGenerationStates();
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
+
// Variables de timing pour la simulation
|
201 |
const totalSteps = SIMULATED_LOGS.length;
|
202 |
+
const intervalPerStep = SIMULATION_DURATION / totalSteps;
|
|
|
203 |
let currentStep = 0;
|
204 |
|
205 |
+
// Fonction pour ajouter le prochain message de log
|
206 |
const addNextLog = () => {
|
207 |
if (currentStep < SIMULATED_LOGS.length) {
|
208 |
const newLogs = [...generationLogs, SIMULATED_LOGS[currentStep]];
|
209 |
setGenerationLogs(newLogs);
|
210 |
currentStep++;
|
211 |
|
212 |
+
// Vérifier si terminé
|
213 |
if (currentStep >= SIMULATED_LOGS.length) {
|
214 |
+
// Simulation terminée
|
215 |
setTimeout(() => {
|
216 |
setCurrentPhase("complete");
|
217 |
+
notifyGenerationComplete(true, newLogs);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
}, 1000);
|
219 |
}
|
220 |
}
|
221 |
};
|
222 |
|
223 |
+
// Démarrer la simulation
|
224 |
simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
|
225 |
};
|
226 |
|
227 |
+
// Déterminer la phase actuelle et les étapes terminées en fonction des logs
|
228 |
useEffect(() => {
|
229 |
if (generationLogs.length === 0) return;
|
230 |
|
231 |
+
// Recalculer les étapes terminées à chaque fois
|
|
|
232 |
const newCompletedSteps = [];
|
233 |
|
234 |
+
// Vérifier les erreurs de limitation de débit et de disponibilité du modèle
|
235 |
+
const hasError = generationLogs.some(
|
236 |
(log) =>
|
237 |
log.includes("RATE_LIMIT_EXCEEDED") ||
|
238 |
log.includes("heavy load") ||
|
239 |
+
log.includes("rate limit") ||
|
240 |
+
log.includes("Required models not available") ||
|
241 |
+
log.includes("Configuration failed") ||
|
242 |
+
log.includes("Error") ||
|
243 |
+
log.includes("ERROR")
|
244 |
);
|
245 |
|
246 |
+
if (hasError) {
|
247 |
+
const errorMessage =
|
248 |
+
generationLogs.find(
|
249 |
+
(log) =>
|
250 |
+
log.includes("Required models not available") ||
|
251 |
+
log.includes("Configuration failed") ||
|
252 |
+
log.includes("Error generating configuration")
|
253 |
+
) ||
|
254 |
+
"The demo is under heavy load at the moment. Please try again later.";
|
255 |
+
|
256 |
+
setError(errorMessage);
|
257 |
+
notifyGenerationComplete(false, null, errorMessage);
|
258 |
return;
|
259 |
}
|
260 |
|
261 |
+
// Identifier toutes les étapes terminées dans tous les logs
|
262 |
generationLogs.forEach((log) => {
|
263 |
const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
|
264 |
if (match && match[1]) {
|
|
|
272 |
}
|
273 |
});
|
274 |
|
275 |
+
// Déterminer l'étape active en fonction des étapes terminées
|
276 |
let newActiveStep = activeStep;
|
277 |
|
278 |
if (newCompletedSteps.length > 0) {
|
279 |
+
// Trouver l'étape la plus avancée dans les logs
|
280 |
const maxCompletedStepIndex = Math.max(
|
281 |
...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
|
282 |
);
|
283 |
+
// Passer à l'étape suivante
|
284 |
const calculatedStep = maxCompletedStepIndex + 1;
|
285 |
|
286 |
+
// Mettre à jour uniquement si la nouvelle étape est plus avancée que l'étape actuelle
|
287 |
if (calculatedStep > activeStep) {
|
288 |
newActiveStep = calculatedStep;
|
289 |
}
|
290 |
|
291 |
+
// S'assurer que activeStep ne dépasse pas le nombre total d'étapes
|
292 |
if (newActiveStep >= BENCHMARK_STEPS.length) {
|
293 |
newActiveStep = BENCHMARK_STEPS.length;
|
294 |
}
|
295 |
} else if (activeStep === 0) {
|
296 |
+
// Si aucune étape n'est trouvée et que l'étape active est 0, passer à 1
|
297 |
newActiveStep = 1;
|
298 |
}
|
299 |
|
300 |
+
// Mettre à jour l'état si les étapes ont changé
|
301 |
if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
|
302 |
setCompletedSteps(newCompletedSteps);
|
303 |
}
|
304 |
|
305 |
+
// Mettre à jour l'étape active uniquement si elle a changé
|
306 |
if (newActiveStep !== activeStep) {
|
307 |
setActiveStep(newActiveStep);
|
308 |
}
|
309 |
|
310 |
+
// Ignorer le reste du traitement des logs si nous simulons
|
311 |
if (isDefault) return;
|
312 |
|
313 |
+
// Vérifier les derniers logs pour déterminer la phase actuelle
|
314 |
+
const recentLogs = generationLogs.slice(-10);
|
315 |
|
316 |
+
// Détecter les conditions d'achèvement
|
317 |
const isComplete =
|
318 |
recentLogs.some((log) =>
|
319 |
log.includes("[SUCCESS] Benchmark process completed successfully")
|
|
|
326 |
|
327 |
if (isComplete) {
|
328 |
setCurrentPhase("complete");
|
329 |
+
notifyGenerationComplete(true, generationLogs);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
} else if (
|
331 |
recentLogs.some((log) => log.includes("Starting ingestion process"))
|
332 |
) {
|
|
|
345 |
isDefault,
|
346 |
]);
|
347 |
|
348 |
+
// Générer le benchmark
|
349 |
const generateBenchmark = async () => {
|
350 |
if (!sessionId) {
|
351 |
setError("Missing session ID");
|
352 |
return;
|
353 |
}
|
354 |
|
355 |
+
resetGenerationStates();
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
try {
|
358 |
+
// Appeler l'API pour générer le benchmark
|
359 |
const response = await fetch(
|
360 |
`${API_CONFIG.BASE_URL}/generate-benchmark`,
|
361 |
{
|
362 |
method: "POST",
|
363 |
+
headers: { "Content-Type": "application/json" },
|
364 |
+
body: JSON.stringify({ session_id: sessionId }),
|
|
|
|
|
|
|
|
|
365 |
}
|
366 |
);
|
367 |
|
|
|
370 |
if (response.ok) {
|
371 |
setGenerationLogs(result.logs || []);
|
372 |
|
373 |
+
// Configurer le polling pour suivre la progression
|
374 |
pollingIntervalRef.current = setInterval(async () => {
|
375 |
+
// Vérifier si nous avons déjà terminé
|
376 |
if (generationComplete) {
|
377 |
clearInterval(pollingIntervalRef.current);
|
378 |
return;
|
379 |
}
|
380 |
|
381 |
try {
|
382 |
+
// Appeler l'API pour obtenir les derniers logs
|
383 |
const logsResponse = await fetch(
|
384 |
`${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
|
385 |
);
|
|
|
387 |
if (logsResponse.ok) {
|
388 |
const logsResult = await logsResponse.json();
|
389 |
|
390 |
+
// Mettre à jour les logs s'il y en a de nouveaux
|
391 |
if (
|
392 |
logsResult.logs &&
|
393 |
logsResult.logs.length > generationLogs.length
|
|
|
395 |
setGenerationLogs(logsResult.logs);
|
396 |
}
|
397 |
|
398 |
+
// Vérifier si la tâche est terminée
|
399 |
if (logsResult.is_completed) {
|
400 |
setGenerationComplete(true);
|
401 |
clearInterval(pollingIntervalRef.current);
|
|
|
402 |
}
|
403 |
}
|
404 |
} catch (error) {
|
405 |
console.log("Error polling for logs:", error);
|
406 |
+
// Ne pas arrêter le polling en cas d'erreurs réseau
|
407 |
}
|
408 |
+
}, 2000); // Sondage toutes les 2 secondes
|
409 |
} else {
|
410 |
+
// Gérer l'erreur
|
411 |
setGenerationLogs([`Error: ${result.error || "Unknown error"}`]);
|
412 |
setError(result.error || "Benchmark generation failed");
|
413 |
}
|
|
|
420 |
}
|
421 |
};
|
422 |
|
423 |
+
// Obtenir les informations sur l'étape actuelle pour l'affichage
|
424 |
const getCurrentStepInfo = () => {
|
425 |
const totalSteps = BENCHMARK_STEPS.length;
|
426 |
const currentStepIndex = activeStep;
|
427 |
|
428 |
+
// S'il n'y a pas encore d'étape active
|
429 |
if (currentStepIndex <= 1 && completedSteps.length === 0) {
|
430 |
return `Starting (1/${totalSteps})`;
|
431 |
}
|
432 |
|
433 |
+
// Si toutes les étapes sont terminées
|
434 |
if (currentStepIndex >= totalSteps) {
|
435 |
return `Complete (${totalSteps}/${totalSteps})`;
|
436 |
}
|
437 |
|
438 |
+
// Obtenir le nom de l'étape actuelle
|
439 |
const currentStepName =
|
440 |
STEP_LABELS[BENCHMARK_STEPS[currentStepIndex]] || "Processing";
|
441 |
|
442 |
return `${currentStepName} (${currentStepIndex}/${totalSteps})`;
|
443 |
};
|
444 |
|
445 |
+
// Formater le temps écoulé en HH:MM:SS
|
446 |
const formatElapsedTime = () => {
|
447 |
const hours = Math.floor(elapsedTime / 3600);
|
448 |
const minutes = Math.floor((elapsedTime % 3600) / 60);
|
|
|
455 |
].join(":");
|
456 |
};
|
457 |
|
458 |
+
// Si terminé, arrêter le timer
|
459 |
useEffect(() => {
|
460 |
if (generationComplete && timerIntervalRef.current) {
|
461 |
clearInterval(timerIntervalRef.current);
|
462 |
}
|
463 |
}, [generationComplete]);
|
464 |
|
465 |
+
const handleGenerationComplete = (result) => {
|
466 |
+
console.log("Benchmark generation completed:", result);
|
467 |
+
if (result && result.success && !hasRedirectedRef.current) {
|
468 |
+
hasRedirectedRef.current = true; // Marquer que la redirection a été faite
|
469 |
+
// Légère pause avant de naviguer pour éviter les problèmes de synchronisation
|
470 |
+
setTimeout(() => {
|
471 |
+
navigate(`/benchmark-display?session=${sessionId}`);
|
472 |
+
}, 500);
|
473 |
+
} else if (result && !result.success) {
|
474 |
+
// Afficher l'erreur au lieu de rediriger
|
475 |
+
setError(result.error || "An error occurred during benchmark generation");
|
476 |
+
}
|
477 |
+
};
|
478 |
+
|
479 |
return (
|
480 |
<Paper
|
481 |
elevation={3}
|
|
|
518 |
</Box>
|
519 |
|
520 |
{error ? (
|
521 |
+
<ErrorDisplay error={error} />
|
|
|
|
|
522 |
) : (
|
523 |
<>
|
524 |
<CircularProgress size={60} sx={{ mb: 2 }} />
|
|
|
553 |
);
|
554 |
};
|
555 |
|
556 |
+
export default Generator;
|
frontend/src/components/Benchmark/hooks/useBenchmarkLogs.js
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useEffect } from "react";
|
2 |
+
|
3 |
+
const BENCHMARK_STEPS = [
|
4 |
+
"configuration",
|
5 |
+
"provider_check",
|
6 |
+
"ingestion",
|
7 |
+
"upload_ingest_to_hub",
|
8 |
+
"summarization",
|
9 |
+
"chunking",
|
10 |
+
"single_shot_question_generation",
|
11 |
+
];
|
12 |
+
|
13 |
+
export const useBenchmarkLogs = (sessionId, isDefault, onComplete) => {
|
14 |
+
const [generationLogs, setGenerationLogs] = useState([]);
|
15 |
+
const [error, setError] = useState(null);
|
16 |
+
const [currentPhase, setCurrentPhase] = useState("initializing");
|
17 |
+
const [completedSteps, setCompletedSteps] = useState([]);
|
18 |
+
const [activeStep, setActiveStep] = useState(1);
|
19 |
+
const [generationComplete, setGenerationComplete] = useState(false);
|
20 |
+
|
21 |
+
const checkForErrors = (logs) => {
|
22 |
+
// Check for rate limiting errors
|
23 |
+
const hasRateLimitError = logs.some(
|
24 |
+
(log) =>
|
25 |
+
log.includes("RATE_LIMIT_EXCEEDED") ||
|
26 |
+
log.includes("heavy load") ||
|
27 |
+
log.includes("rate limit")
|
28 |
+
);
|
29 |
+
|
30 |
+
if (hasRateLimitError) {
|
31 |
+
return {
|
32 |
+
hasError: true,
|
33 |
+
error:
|
34 |
+
"The demo is under heavy load at the moment. Please try again later.",
|
35 |
+
};
|
36 |
+
}
|
37 |
+
|
38 |
+
// Check for model availability errors
|
39 |
+
const hasModelError = logs.some(
|
40 |
+
(log) =>
|
41 |
+
log.includes("Required models not available") ||
|
42 |
+
log.includes("Some required models are not available")
|
43 |
+
);
|
44 |
+
|
45 |
+
if (hasModelError) {
|
46 |
+
return {
|
47 |
+
hasError: true,
|
48 |
+
error:
|
49 |
+
"Some required models are not available at the moment. Please try again later.",
|
50 |
+
};
|
51 |
+
}
|
52 |
+
|
53 |
+
// Check for configuration errors
|
54 |
+
const hasConfigError = logs.some(
|
55 |
+
(log) =>
|
56 |
+
log.includes("Error generating configuration") ||
|
57 |
+
log.includes("Configuration failed")
|
58 |
+
);
|
59 |
+
|
60 |
+
if (hasConfigError) {
|
61 |
+
return {
|
62 |
+
hasError: true,
|
63 |
+
error:
|
64 |
+
"Failed to generate benchmark configuration. Please try again later.",
|
65 |
+
};
|
66 |
+
}
|
67 |
+
|
68 |
+
return { hasError: false };
|
69 |
+
};
|
70 |
+
|
71 |
+
const updateSteps = (logs) => {
|
72 |
+
const newCompletedSteps = [];
|
73 |
+
|
74 |
+
logs.forEach((log) => {
|
75 |
+
const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
|
76 |
+
if (match && match[1]) {
|
77 |
+
const completedStep = match[1].trim();
|
78 |
+
if (
|
79 |
+
BENCHMARK_STEPS.includes(completedStep) &&
|
80 |
+
!newCompletedSteps.includes(completedStep)
|
81 |
+
) {
|
82 |
+
newCompletedSteps.push(completedStep);
|
83 |
+
}
|
84 |
+
}
|
85 |
+
});
|
86 |
+
|
87 |
+
let newActiveStep = activeStep;
|
88 |
+
|
89 |
+
if (newCompletedSteps.length > 0) {
|
90 |
+
const maxCompletedStepIndex = Math.max(
|
91 |
+
...newCompletedSteps.map((step) => BENCHMARK_STEPS.indexOf(step))
|
92 |
+
);
|
93 |
+
const calculatedStep = maxCompletedStepIndex + 1;
|
94 |
+
|
95 |
+
if (calculatedStep > activeStep) {
|
96 |
+
newActiveStep = calculatedStep;
|
97 |
+
}
|
98 |
+
|
99 |
+
if (newActiveStep >= BENCHMARK_STEPS.length) {
|
100 |
+
newActiveStep = BENCHMARK_STEPS.length;
|
101 |
+
}
|
102 |
+
} else if (activeStep === 0) {
|
103 |
+
newActiveStep = 1;
|
104 |
+
}
|
105 |
+
|
106 |
+
return { newCompletedSteps, newActiveStep };
|
107 |
+
};
|
108 |
+
|
109 |
+
const updatePhase = (logs) => {
|
110 |
+
const recentLogs = logs.slice(-10);
|
111 |
+
|
112 |
+
const isComplete = recentLogs.some((log) =>
|
113 |
+
log.includes("[SUCCESS] Benchmark process completed successfully")
|
114 |
+
);
|
115 |
+
|
116 |
+
if (isComplete) {
|
117 |
+
return "complete";
|
118 |
+
} else if (
|
119 |
+
recentLogs.some((log) => log.includes("Starting ingestion process"))
|
120 |
+
) {
|
121 |
+
return "benchmarking";
|
122 |
+
} else if (
|
123 |
+
recentLogs.some((log) => log.includes("Generating base configuration"))
|
124 |
+
) {
|
125 |
+
return "configuring";
|
126 |
+
}
|
127 |
+
|
128 |
+
return currentPhase;
|
129 |
+
};
|
130 |
+
|
131 |
+
useEffect(() => {
|
132 |
+
if (generationLogs.length === 0) return;
|
133 |
+
|
134 |
+
const errorCheck = checkForErrors(generationLogs);
|
135 |
+
if (errorCheck.hasError) {
|
136 |
+
setError(errorCheck.error);
|
137 |
+
setGenerationComplete(true);
|
138 |
+
if (onComplete) {
|
139 |
+
onComplete({
|
140 |
+
success: false,
|
141 |
+
error: errorCheck.error,
|
142 |
+
sessionId,
|
143 |
+
});
|
144 |
+
}
|
145 |
+
return;
|
146 |
+
}
|
147 |
+
|
148 |
+
const { newCompletedSteps, newActiveStep } = updateSteps(generationLogs);
|
149 |
+
const newPhase = updatePhase(generationLogs);
|
150 |
+
|
151 |
+
if (JSON.stringify(newCompletedSteps) !== JSON.stringify(completedSteps)) {
|
152 |
+
setCompletedSteps(newCompletedSteps);
|
153 |
+
}
|
154 |
+
|
155 |
+
if (newActiveStep !== activeStep) {
|
156 |
+
setActiveStep(newActiveStep);
|
157 |
+
}
|
158 |
+
|
159 |
+
if (newPhase !== currentPhase) {
|
160 |
+
setCurrentPhase(newPhase);
|
161 |
+
}
|
162 |
+
|
163 |
+
// Vérifier si le benchmark est réellement terminé sans erreur
|
164 |
+
const recentLogs = generationLogs.slice(-10);
|
165 |
+
const isComplete = recentLogs.some((log) =>
|
166 |
+
log.includes("[SUCCESS] Benchmark process completed successfully")
|
167 |
+
);
|
168 |
+
|
169 |
+
if (isComplete) {
|
170 |
+
setGenerationComplete(true);
|
171 |
+
if (onComplete) {
|
172 |
+
onComplete({
|
173 |
+
success: true,
|
174 |
+
sessionId,
|
175 |
+
logs: generationLogs,
|
176 |
+
});
|
177 |
+
}
|
178 |
+
}
|
179 |
+
}, [generationLogs, sessionId, onComplete]);
|
180 |
+
|
181 |
+
return {
|
182 |
+
generationLogs,
|
183 |
+
setGenerationLogs,
|
184 |
+
error,
|
185 |
+
setError,
|
186 |
+
currentPhase,
|
187 |
+
completedSteps,
|
188 |
+
activeStep,
|
189 |
+
generationComplete,
|
190 |
+
setGenerationComplete,
|
191 |
+
};
|
192 |
+
};
|
frontend/src/components/Benchmark/hooks/useBenchmarkPolling.js
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useRef, useEffect } from "react";
|
2 |
+
import API_CONFIG from "../../../config/api";
|
3 |
+
|
4 |
+
export const useBenchmarkPolling = (
|
5 |
+
sessionId,
|
6 |
+
setGenerationLogs,
|
7 |
+
setGenerationComplete,
|
8 |
+
onComplete
|
9 |
+
) => {
|
10 |
+
const pollingIntervalRef = useRef(null);
|
11 |
+
|
12 |
+
const startPolling = () => {
|
13 |
+
if (pollingIntervalRef.current) {
|
14 |
+
clearInterval(pollingIntervalRef.current);
|
15 |
+
}
|
16 |
+
|
17 |
+
pollingIntervalRef.current = setInterval(async () => {
|
18 |
+
try {
|
19 |
+
const logsResponse = await fetch(
|
20 |
+
`${API_CONFIG.BASE_URL}/benchmark-progress/${sessionId}`
|
21 |
+
);
|
22 |
+
|
23 |
+
if (logsResponse.ok) {
|
24 |
+
const logsResult = await logsResponse.json();
|
25 |
+
|
26 |
+
if (logsResult.logs) {
|
27 |
+
setGenerationLogs((prevLogs) => {
|
28 |
+
if (logsResult.logs.length > prevLogs.length) {
|
29 |
+
return logsResult.logs;
|
30 |
+
}
|
31 |
+
return prevLogs;
|
32 |
+
});
|
33 |
+
}
|
34 |
+
|
35 |
+
// Vérifier s'il y a des erreurs dans les logs
|
36 |
+
const hasError = logsResult.logs.some(
|
37 |
+
(log) =>
|
38 |
+
log.includes("Error") ||
|
39 |
+
log.includes("ERROR") ||
|
40 |
+
log.includes("Failed") ||
|
41 |
+
log.includes("RATE_LIMIT_EXCEEDED") ||
|
42 |
+
log.includes("heavy load") ||
|
43 |
+
log.includes("rate limit")
|
44 |
+
);
|
45 |
+
|
46 |
+
if (hasError) {
|
47 |
+
setGenerationComplete(true);
|
48 |
+
clearInterval(pollingIntervalRef.current);
|
49 |
+
if (onComplete) {
|
50 |
+
onComplete({
|
51 |
+
success: false,
|
52 |
+
error:
|
53 |
+
"An error occurred during benchmark generation. Please try again later.",
|
54 |
+
sessionId,
|
55 |
+
});
|
56 |
+
}
|
57 |
+
return;
|
58 |
+
}
|
59 |
+
|
60 |
+
if (logsResult.is_completed) {
|
61 |
+
setGenerationComplete(true);
|
62 |
+
clearInterval(pollingIntervalRef.current);
|
63 |
+
if (onComplete) {
|
64 |
+
onComplete({
|
65 |
+
success: true,
|
66 |
+
sessionId,
|
67 |
+
logs: logsResult.logs,
|
68 |
+
});
|
69 |
+
}
|
70 |
+
}
|
71 |
+
} else {
|
72 |
+
const errorData = await logsResponse.json();
|
73 |
+
setGenerationComplete(true);
|
74 |
+
clearInterval(pollingIntervalRef.current);
|
75 |
+
if (onComplete) {
|
76 |
+
onComplete({
|
77 |
+
success: false,
|
78 |
+
error: errorData.error || "Unknown error",
|
79 |
+
sessionId,
|
80 |
+
});
|
81 |
+
}
|
82 |
+
}
|
83 |
+
} catch (error) {
|
84 |
+
setGenerationComplete(true);
|
85 |
+
clearInterval(pollingIntervalRef.current);
|
86 |
+
if (onComplete) {
|
87 |
+
onComplete({
|
88 |
+
success: false,
|
89 |
+
error: error.message,
|
90 |
+
sessionId,
|
91 |
+
});
|
92 |
+
}
|
93 |
+
}
|
94 |
+
}, 2000);
|
95 |
+
};
|
96 |
+
|
97 |
+
useEffect(() => {
|
98 |
+
return () => {
|
99 |
+
if (pollingIntervalRef.current) {
|
100 |
+
clearInterval(pollingIntervalRef.current);
|
101 |
+
}
|
102 |
+
};
|
103 |
+
}, []);
|
104 |
+
|
105 |
+
return { startPolling };
|
106 |
+
};
|
frontend/src/components/Benchmark/hooks/useBenchmarkSimulation.js
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useRef } from "react";
|
2 |
+
|
3 |
+
const SIMULATED_LOGS = [
|
4 |
+
"[INFO] Initializing benchmark generation...",
|
5 |
+
"[INFO] Generating base configuration file...",
|
6 |
+
"[SUCCESS] Stage completed: configuration",
|
7 |
+
"[INFO] Finding available providers for models...",
|
8 |
+
"[SUCCESS] Stage completed: provider_check",
|
9 |
+
"[INFO] Starting ingestion process...",
|
10 |
+
"[SUCCESS] Stage completed: ingestion",
|
11 |
+
"[INFO] Processing document content for upload...",
|
12 |
+
"[SUCCESS] Stage completed: upload_ingest_to_hub",
|
13 |
+
"[INFO] Generating document summary...",
|
14 |
+
"[SUCCESS] Stage completed: summarization",
|
15 |
+
"[INFO] Chunking content for better analysis...",
|
16 |
+
"[SUCCESS] Stage completed: chunking",
|
17 |
+
"[INFO] Generating single-shot questions...",
|
18 |
+
"[SUCCESS] Stage completed: single_shot_question_generation",
|
19 |
+
"[SUCCESS] Benchmark process completed successfully",
|
20 |
+
];
|
21 |
+
|
22 |
+
export const useBenchmarkSimulation = (
|
23 |
+
setGenerationLogs,
|
24 |
+
setGenerationComplete,
|
25 |
+
onComplete,
|
26 |
+
sessionId
|
27 |
+
) => {
|
28 |
+
const simulationIntervalRef = useRef(null);
|
29 |
+
const SIMULATION_DURATION = 80000; // 20 seconds
|
30 |
+
|
31 |
+
const startSimulation = () => {
|
32 |
+
setGenerationLogs([]);
|
33 |
+
let currentStep = 0;
|
34 |
+
|
35 |
+
const addNextLog = () => {
|
36 |
+
if (currentStep < SIMULATED_LOGS.length) {
|
37 |
+
setGenerationLogs((prevLogs) => [
|
38 |
+
...prevLogs,
|
39 |
+
SIMULATED_LOGS[currentStep],
|
40 |
+
]);
|
41 |
+
currentStep++;
|
42 |
+
|
43 |
+
if (currentStep >= SIMULATED_LOGS.length) {
|
44 |
+
setTimeout(() => {
|
45 |
+
setGenerationComplete(true);
|
46 |
+
clearInterval(simulationIntervalRef.current);
|
47 |
+
if (onComplete) {
|
48 |
+
onComplete({
|
49 |
+
success: true,
|
50 |
+
sessionId,
|
51 |
+
logs: SIMULATED_LOGS,
|
52 |
+
});
|
53 |
+
}
|
54 |
+
}, 1000);
|
55 |
+
}
|
56 |
+
}
|
57 |
+
};
|
58 |
+
|
59 |
+
const totalSteps = SIMULATED_LOGS.length;
|
60 |
+
const intervalPerStep = SIMULATION_DURATION / totalSteps;
|
61 |
+
|
62 |
+
simulationIntervalRef.current = setInterval(addNextLog, intervalPerStep);
|
63 |
+
};
|
64 |
+
|
65 |
+
return { startSimulation };
|
66 |
+
};
|
frontend/src/components/BenchmarkEvaluation.jsx
DELETED
@@ -1,401 +0,0 @@
|
|
1 |
-
import React, { useState, useEffect, useRef } from "react";
|
2 |
-
import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
|
3 |
-
import { useNavigate, useSearchParams } from "react-router-dom";
|
4 |
-
import API_CONFIG from "../config/api";
|
5 |
-
|
6 |
-
// Temps de simulation en millisecondes pour les documents précalculés
|
7 |
-
const SIMULATION_DURATION = 120000; // 20 secondes
|
8 |
-
|
9 |
-
// Intervalle de changement des messages pour les documents standards vs précalculés
|
10 |
-
const MESSAGE_CHANGE_INTERVAL = {
|
11 |
-
DEFAULT: 25000, // 20 secondes pour documents standards
|
12 |
-
PRECALCULATED: 25000, // 5 secondes pour documents précalculés
|
13 |
-
};
|
14 |
-
|
15 |
-
// Starting messages with their timing
|
16 |
-
const STARTING_MESSAGES = [
|
17 |
-
{ message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
|
18 |
-
{ message: "Finding available model providers", step: 2, totalSteps: 5 },
|
19 |
-
{ message: "Starting evaluation process", step: 3, totalSteps: 5 },
|
20 |
-
{ message: "Evaluating models", step: 4, totalSteps: 5 },
|
21 |
-
{ message: "Storing evaluation results", step: 5, totalSteps: 5 },
|
22 |
-
];
|
23 |
-
|
24 |
-
const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
|
25 |
-
const [searchParams] = useSearchParams();
|
26 |
-
const isDefault =
|
27 |
-
isDefaultDocument ||
|
28 |
-
["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
|
29 |
-
const [evaluationComplete, setEvaluationComplete] = useState(false);
|
30 |
-
const [error, setError] = useState(null);
|
31 |
-
const [elapsedTime, setElapsedTime] = useState(0);
|
32 |
-
const [startingMessageIndex, setStartingMessageIndex] = useState(0);
|
33 |
-
const [evaluationStarted, setEvaluationStarted] = useState(false);
|
34 |
-
|
35 |
-
const timerIntervalRef = useRef(null);
|
36 |
-
const startTimeRef = useRef(null);
|
37 |
-
const startingMessageIntervalRef = useRef(null);
|
38 |
-
const pollingIntervalRef = useRef(null);
|
39 |
-
const simulationTimeoutRef = useRef(null);
|
40 |
-
|
41 |
-
const navigate = useNavigate();
|
42 |
-
|
43 |
-
// Add effect to handle automatic redirection when evaluation is complete
|
44 |
-
useEffect(() => {
|
45 |
-
if (evaluationComplete) {
|
46 |
-
navigate(`/evaluation-display?session=${sessionId}`);
|
47 |
-
}
|
48 |
-
}, [evaluationComplete, sessionId, navigate]);
|
49 |
-
|
50 |
-
// Add effect to handle starting messages
|
51 |
-
useEffect(() => {
|
52 |
-
// Ne configurer l'intervalle automatique que pour les documents par défaut
|
53 |
-
// Pour les évaluations réelles, on se fiera uniquement aux mises à jour de l'API
|
54 |
-
if (isDefault) {
|
55 |
-
startingMessageIntervalRef.current = setInterval(() => {
|
56 |
-
setStartingMessageIndex((prev) => {
|
57 |
-
if (prev < STARTING_MESSAGES.length - 1) {
|
58 |
-
return prev + 1;
|
59 |
-
}
|
60 |
-
return prev;
|
61 |
-
});
|
62 |
-
}, MESSAGE_CHANGE_INTERVAL.PRECALCULATED);
|
63 |
-
}
|
64 |
-
|
65 |
-
return () => {
|
66 |
-
if (startingMessageIntervalRef.current) {
|
67 |
-
clearInterval(startingMessageIntervalRef.current);
|
68 |
-
}
|
69 |
-
};
|
70 |
-
}, [isDefault]);
|
71 |
-
|
72 |
-
// Start evaluation when component mounts
|
73 |
-
useEffect(() => {
|
74 |
-
// Set start time
|
75 |
-
startTimeRef.current = Date.now();
|
76 |
-
|
77 |
-
// Start timer
|
78 |
-
timerIntervalRef.current = setInterval(() => {
|
79 |
-
const timeElapsed = Math.floor(
|
80 |
-
(Date.now() - startTimeRef.current) / 1000
|
81 |
-
);
|
82 |
-
setElapsedTime(timeElapsed);
|
83 |
-
}, 1000);
|
84 |
-
|
85 |
-
// Gestionnaire pour détecter quand la page redevient visible
|
86 |
-
const handleVisibilityChange = () => {
|
87 |
-
if (
|
88 |
-
document.visibilityState === "visible" &&
|
89 |
-
!isDefault &&
|
90 |
-
!evaluationComplete &&
|
91 |
-
evaluationStarted // Vérifier si l'évaluation a déjà commencé
|
92 |
-
) {
|
93 |
-
console.log("Page became visible, checking evaluation status...");
|
94 |
-
// Force une nouvelle requête pour récupérer l'état d'évaluation
|
95 |
-
const checkEvaluationStatus = async () => {
|
96 |
-
try {
|
97 |
-
const logsResponse = await fetch(
|
98 |
-
`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
|
99 |
-
);
|
100 |
-
|
101 |
-
if (logsResponse.ok) {
|
102 |
-
const logsResult = await logsResponse.json();
|
103 |
-
if (logsResult.is_completed) {
|
104 |
-
// Mettre fin à l'évaluation si elle est terminée
|
105 |
-
setEvaluationComplete(true);
|
106 |
-
|
107 |
-
// Avancer à la dernière étape des messages
|
108 |
-
setStartingMessageIndex(STARTING_MESSAGES.length - 1);
|
109 |
-
|
110 |
-
// Nettoyer les intervalles
|
111 |
-
if (pollingIntervalRef.current) {
|
112 |
-
clearInterval(pollingIntervalRef.current);
|
113 |
-
}
|
114 |
-
if (startingMessageIntervalRef.current) {
|
115 |
-
clearInterval(startingMessageIntervalRef.current);
|
116 |
-
}
|
117 |
-
} else {
|
118 |
-
// Si l'évaluation est toujours en cours, utiliser l'étape actuelle du backend
|
119 |
-
if (logsResult.current_step) {
|
120 |
-
// Utiliser la fonction de mappage pour déterminer l'index du message
|
121 |
-
const newIndex = mapStepToMessageIndex(
|
122 |
-
logsResult.current_step
|
123 |
-
);
|
124 |
-
setStartingMessageIndex(newIndex);
|
125 |
-
} else {
|
126 |
-
// Fallback basé sur le temps si l'étape n'est pas disponible
|
127 |
-
const progress = Math.min(
|
128 |
-
Math.floor(
|
129 |
-
(Date.now() - startTimeRef.current) /
|
130 |
-
MESSAGE_CHANGE_INTERVAL.DEFAULT
|
131 |
-
),
|
132 |
-
STARTING_MESSAGES.length - 1
|
133 |
-
);
|
134 |
-
setStartingMessageIndex(progress);
|
135 |
-
}
|
136 |
-
}
|
137 |
-
}
|
138 |
-
} catch (error) {
|
139 |
-
console.error("Error checking evaluation status:", error);
|
140 |
-
}
|
141 |
-
};
|
142 |
-
|
143 |
-
checkEvaluationStatus();
|
144 |
-
}
|
145 |
-
};
|
146 |
-
|
147 |
-
// Ajouter l'écouteur pour le changement de visibilité
|
148 |
-
document.addEventListener("visibilitychange", handleVisibilityChange);
|
149 |
-
|
150 |
-
if (isDefault) {
|
151 |
-
simulateEvaluation();
|
152 |
-
} else {
|
153 |
-
// Démarrer l'évaluation seulement si elle n'a pas déjà été lancée
|
154 |
-
if (!evaluationStarted) {
|
155 |
-
startEvaluation();
|
156 |
-
}
|
157 |
-
}
|
158 |
-
|
159 |
-
// Clean up intervals on unmount
|
160 |
-
return () => {
|
161 |
-
if (pollingIntervalRef.current) {
|
162 |
-
clearInterval(pollingIntervalRef.current);
|
163 |
-
}
|
164 |
-
if (timerIntervalRef.current) {
|
165 |
-
clearInterval(timerIntervalRef.current);
|
166 |
-
}
|
167 |
-
if (simulationTimeoutRef.current) {
|
168 |
-
clearTimeout(simulationTimeoutRef.current);
|
169 |
-
}
|
170 |
-
document.removeEventListener("visibilitychange", handleVisibilityChange);
|
171 |
-
};
|
172 |
-
}, [isDefault, sessionId, evaluationComplete, evaluationStarted]);
|
173 |
-
|
174 |
-
// Simulate the evaluation process for pre-calculated documents
|
175 |
-
const simulateEvaluation = () => {
|
176 |
-
// Complete after 20 seconds
|
177 |
-
simulationTimeoutRef.current = setTimeout(() => {
|
178 |
-
setEvaluationComplete(true);
|
179 |
-
|
180 |
-
if (startingMessageIntervalRef.current) {
|
181 |
-
clearInterval(startingMessageIntervalRef.current);
|
182 |
-
}
|
183 |
-
|
184 |
-
setStartingMessageIndex(STARTING_MESSAGES.length - 1); // Set to last message
|
185 |
-
}, SIMULATION_DURATION);
|
186 |
-
};
|
187 |
-
|
188 |
-
// Format elapsed time as HH:MM:SS
|
189 |
-
const formatElapsedTime = () => {
|
190 |
-
const hours = Math.floor(elapsedTime / 3600);
|
191 |
-
const minutes = Math.floor((elapsedTime % 3600) / 60);
|
192 |
-
const seconds = elapsedTime % 60;
|
193 |
-
|
194 |
-
return [
|
195 |
-
hours.toString().padStart(2, "0"),
|
196 |
-
minutes.toString().padStart(2, "0"),
|
197 |
-
seconds.toString().padStart(2, "0"),
|
198 |
-
].join(":");
|
199 |
-
};
|
200 |
-
|
201 |
-
// Fonction pour mapper le nom de l'étape backend vers l'index dans STARTING_MESSAGES
|
202 |
-
const mapStepToMessageIndex = (currentStep) => {
|
203 |
-
switch (currentStep) {
|
204 |
-
case "initializing":
|
205 |
-
return 0;
|
206 |
-
case "finding_available_model_providers":
|
207 |
-
return 1;
|
208 |
-
case "starting_evaluation_process":
|
209 |
-
return 2;
|
210 |
-
case "evaluating_models":
|
211 |
-
return 3;
|
212 |
-
case "storing_evaluation_results":
|
213 |
-
case "completed":
|
214 |
-
return 4;
|
215 |
-
default:
|
216 |
-
// Calculer l'étape en fonction du temps écoulé si l'étape n'est pas reconnue
|
217 |
-
const elapsedSinceStart = Date.now() - startTimeRef.current;
|
218 |
-
const estimatedTotalTime = 80000; // 80 secondes
|
219 |
-
const estimatedProgress = Math.min(
|
220 |
-
elapsedSinceStart / estimatedTotalTime,
|
221 |
-
1
|
222 |
-
);
|
223 |
-
return Math.min(
|
224 |
-
Math.floor(estimatedProgress * STARTING_MESSAGES.length),
|
225 |
-
STARTING_MESSAGES.length - 1
|
226 |
-
);
|
227 |
-
}
|
228 |
-
};
|
229 |
-
|
230 |
-
// Start benchmark evaluation
|
231 |
-
const startEvaluation = async () => {
|
232 |
-
if (!sessionId) {
|
233 |
-
setError("Missing session ID");
|
234 |
-
return;
|
235 |
-
}
|
236 |
-
|
237 |
-
// Marquer que l'évaluation a commencé
|
238 |
-
setEvaluationStarted(true);
|
239 |
-
|
240 |
-
try {
|
241 |
-
// Call API to start evaluation
|
242 |
-
const response = await fetch(
|
243 |
-
`${API_CONFIG.BASE_URL}/evaluate-benchmark`,
|
244 |
-
{
|
245 |
-
method: "POST",
|
246 |
-
headers: {
|
247 |
-
"Content-Type": "application/json",
|
248 |
-
},
|
249 |
-
body: JSON.stringify({
|
250 |
-
session_id: sessionId,
|
251 |
-
}),
|
252 |
-
}
|
253 |
-
);
|
254 |
-
|
255 |
-
const result = await response.json();
|
256 |
-
|
257 |
-
if (response.ok) {
|
258 |
-
// Set up polling to check completion
|
259 |
-
pollingIntervalRef.current = setInterval(async () => {
|
260 |
-
try {
|
261 |
-
const logsResponse = await fetch(
|
262 |
-
`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
|
263 |
-
);
|
264 |
-
|
265 |
-
if (logsResponse.ok) {
|
266 |
-
const logsResult = await logsResponse.json();
|
267 |
-
|
268 |
-
// Vérifier si l'évaluation est terminée
|
269 |
-
if (logsResult.is_completed) {
|
270 |
-
setEvaluationComplete(true);
|
271 |
-
|
272 |
-
// Avancer à la dernière étape du message
|
273 |
-
setStartingMessageIndex(STARTING_MESSAGES.length - 1);
|
274 |
-
|
275 |
-
// Arrêter les intervalles
|
276 |
-
clearInterval(pollingIntervalRef.current);
|
277 |
-
if (startingMessageIntervalRef.current) {
|
278 |
-
clearInterval(startingMessageIntervalRef.current);
|
279 |
-
}
|
280 |
-
} else {
|
281 |
-
// Récupérer l'étape actuelle à partir de l'API, si disponible
|
282 |
-
if (logsResult.current_step) {
|
283 |
-
// Utiliser la fonction de mappage pour déterminer l'index du message
|
284 |
-
const newIndex = mapStepToMessageIndex(
|
285 |
-
logsResult.current_step
|
286 |
-
);
|
287 |
-
setStartingMessageIndex(newIndex);
|
288 |
-
} else {
|
289 |
-
// Fallback: Si l'API ne renvoie pas d'étape, estimer en fonction du temps
|
290 |
-
const elapsedSinceStart = Date.now() - startTimeRef.current;
|
291 |
-
const estimatedTotalTime = 80000; // 80 secondes
|
292 |
-
const estimatedProgress = Math.min(
|
293 |
-
elapsedSinceStart / estimatedTotalTime,
|
294 |
-
1
|
295 |
-
);
|
296 |
-
const estimatedStepIndex = Math.min(
|
297 |
-
Math.floor(estimatedProgress * STARTING_MESSAGES.length),
|
298 |
-
STARTING_MESSAGES.length - 1
|
299 |
-
);
|
300 |
-
setStartingMessageIndex(estimatedStepIndex);
|
301 |
-
}
|
302 |
-
}
|
303 |
-
}
|
304 |
-
} catch (error) {
|
305 |
-
console.log("Error polling logs:", error);
|
306 |
-
// Ne pas arrêter le polling en cas d'erreurs réseau temporaires
|
307 |
-
}
|
308 |
-
}, 2000);
|
309 |
-
} else {
|
310 |
-
setError(result.error || "Benchmark evaluation failed");
|
311 |
-
}
|
312 |
-
} catch (error) {
|
313 |
-
console.error("Error starting evaluation:", error);
|
314 |
-
setError("Error connecting to server");
|
315 |
-
}
|
316 |
-
};
|
317 |
-
|
318 |
-
return (
|
319 |
-
<Paper
|
320 |
-
elevation={3}
|
321 |
-
sx={{
|
322 |
-
p: 4,
|
323 |
-
mt: 3,
|
324 |
-
mb: 3,
|
325 |
-
display: "flex",
|
326 |
-
flexDirection: "column",
|
327 |
-
alignItems: "center",
|
328 |
-
justifyContent: "center",
|
329 |
-
minHeight: 200,
|
330 |
-
position: "relative",
|
331 |
-
}}
|
332 |
-
>
|
333 |
-
{/* Temps estimé */}
|
334 |
-
<Box
|
335 |
-
sx={{
|
336 |
-
position: "absolute",
|
337 |
-
top: 12,
|
338 |
-
right: 12,
|
339 |
-
backgroundColor: "rgba(0, 0, 0, 0.04)",
|
340 |
-
borderRadius: "4px",
|
341 |
-
px: 1,
|
342 |
-
py: 0.5,
|
343 |
-
display: "inline-flex",
|
344 |
-
alignItems: "center",
|
345 |
-
}}
|
346 |
-
>
|
347 |
-
<Typography
|
348 |
-
variant="caption"
|
349 |
-
sx={{
|
350 |
-
fontSize: "0.675rem",
|
351 |
-
color: "text.secondary",
|
352 |
-
fontWeight: 500,
|
353 |
-
}}
|
354 |
-
>
|
355 |
-
Estimated time ~ 1m30s
|
356 |
-
</Typography>
|
357 |
-
</Box>
|
358 |
-
|
359 |
-
{error ? (
|
360 |
-
<Alert severity="error" sx={{ width: "100%" }}>
|
361 |
-
{error}
|
362 |
-
</Alert>
|
363 |
-
) : (
|
364 |
-
<>
|
365 |
-
{evaluationComplete ? (
|
366 |
-
<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
|
367 |
-
Evaluation completed successfully!
|
368 |
-
</Alert>
|
369 |
-
) : (
|
370 |
-
<>
|
371 |
-
<CircularProgress size={60} sx={{ mb: 2 }} />
|
372 |
-
<Typography variant="h6" component="div" gutterBottom>
|
373 |
-
Benchmark evaluation...
|
374 |
-
</Typography>
|
375 |
-
|
376 |
-
{/* Step progress indicator */}
|
377 |
-
<Typography variant="body1" color="text.secondary">
|
378 |
-
{`${STARTING_MESSAGES[startingMessageIndex].message} (${STARTING_MESSAGES[startingMessageIndex].step}/${STARTING_MESSAGES[startingMessageIndex].totalSteps})`}
|
379 |
-
</Typography>
|
380 |
-
|
381 |
-
{/* Timer display */}
|
382 |
-
<Box
|
383 |
-
sx={{
|
384 |
-
display: "flex",
|
385 |
-
alignItems: "center",
|
386 |
-
mt: 1,
|
387 |
-
color: "text.secondary",
|
388 |
-
opacity: 0.5,
|
389 |
-
}}
|
390 |
-
>
|
391 |
-
<Typography variant="body2">{formatElapsedTime()}</Typography>
|
392 |
-
</Box>
|
393 |
-
</>
|
394 |
-
)}
|
395 |
-
</>
|
396 |
-
)}
|
397 |
-
</Paper>
|
398 |
-
);
|
399 |
-
};
|
400 |
-
|
401 |
-
export default BenchmarkEvaluation;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/{EvaluationDisplay.jsx → Evaluation/Display.jsx}
RENAMED
@@ -18,6 +18,7 @@ import {
|
|
18 |
} from "@mui/material";
|
19 |
import OpenInNewIcon from "@mui/icons-material/OpenInNew";
|
20 |
import CheckCircleIcon from "@mui/icons-material/CheckCircle";
|
|
|
21 |
|
22 |
// Styles pour les médailles
|
23 |
const MEDAL_STYLES = {
|
@@ -85,7 +86,7 @@ const getMedalStyle = (rank) => {
|
|
85 |
};
|
86 |
};
|
87 |
|
88 |
-
const
|
89 |
// Format accuracy as percentage
|
90 |
const formatAccuracy = (value) => {
|
91 |
return `${(value * 100).toFixed(2)}\u2009%`;
|
@@ -118,9 +119,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
|
|
118 |
results.models_comparison.length === 0
|
119 |
) {
|
120 |
return (
|
121 |
-
<
|
122 |
-
The demo is currently under heavy load, please try again later.
|
123 |
-
|
|
|
124 |
);
|
125 |
}
|
126 |
|
@@ -130,9 +132,10 @@ const EvaluationDisplay = ({ sessionId, results }) => {
|
|
130 |
);
|
131 |
if (successfulModels.length === 0) {
|
132 |
return (
|
133 |
-
<
|
134 |
-
The demo is currently under heavy load, please try again later.
|
135 |
-
|
|
|
136 |
);
|
137 |
}
|
138 |
|
@@ -295,4 +298,4 @@ const EvaluationDisplay = ({ sessionId, results }) => {
|
|
295 |
);
|
296 |
};
|
297 |
|
298 |
-
export default
|
|
|
18 |
} from "@mui/material";
|
19 |
import OpenInNewIcon from "@mui/icons-material/OpenInNew";
|
20 |
import CheckCircleIcon from "@mui/icons-material/CheckCircle";
|
21 |
+
import ErrorDisplay from "../common/ErrorDisplay";
|
22 |
|
23 |
// Styles pour les médailles
|
24 |
const MEDAL_STYLES = {
|
|
|
86 |
};
|
87 |
};
|
88 |
|
89 |
+
const Display = ({ sessionId, results }) => {
|
90 |
// Format accuracy as percentage
|
91 |
const formatAccuracy = (value) => {
|
92 |
return `${(value * 100).toFixed(2)}\u2009%`;
|
|
|
119 |
results.models_comparison.length === 0
|
120 |
) {
|
121 |
return (
|
122 |
+
<ErrorDisplay
|
123 |
+
error="The demo is currently under heavy load, please try again later."
|
124 |
+
title="Service Unavailable"
|
125 |
+
/>
|
126 |
);
|
127 |
}
|
128 |
|
|
|
132 |
);
|
133 |
if (successfulModels.length === 0) {
|
134 |
return (
|
135 |
+
<ErrorDisplay
|
136 |
+
error="The demo is currently under heavy load, please try again later."
|
137 |
+
title="Service Unavailable"
|
138 |
+
/>
|
139 |
);
|
140 |
}
|
141 |
|
|
|
298 |
);
|
299 |
};
|
300 |
|
301 |
+
export default Display;
|
frontend/src/components/Evaluation/Evaluation.jsx
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React from "react";
|
2 |
+
import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
|
3 |
+
import { useNavigate, useSearchParams } from "react-router-dom";
|
4 |
+
import ErrorOutlineIcon from "@mui/icons-material/ErrorOutline";
|
5 |
+
import { useSimulation } from "./hooks/useSimulation";
|
6 |
+
import { useTimer } from "./hooks/useTimer";
|
7 |
+
import { useEvaluation } from "./hooks/useEvaluation";
|
8 |
+
import ErrorDisplay from "../common/ErrorDisplay";
|
9 |
+
|
10 |
+
const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
|
11 |
+
const [searchParams] = useSearchParams();
|
12 |
+
const isDefault =
|
13 |
+
isDefaultDocument ||
|
14 |
+
["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
|
15 |
+
|
16 |
+
const navigate = useNavigate();
|
17 |
+
|
18 |
+
// Use our custom hooks
|
19 |
+
const { formatElapsedTime, stopTimer } = useTimer();
|
20 |
+
const {
|
21 |
+
startingMessageIndex,
|
22 |
+
evaluationComplete: simulationComplete,
|
23 |
+
currentMessage,
|
24 |
+
} = useSimulation(() => {
|
25 |
+
if (onComplete) {
|
26 |
+
onComplete();
|
27 |
+
}
|
28 |
+
});
|
29 |
+
const {
|
30 |
+
error,
|
31 |
+
evaluationComplete: realComplete,
|
32 |
+
currentStep,
|
33 |
+
evaluationStarted,
|
34 |
+
startEvaluation,
|
35 |
+
currentStepLabel,
|
36 |
+
totalSteps,
|
37 |
+
} = useEvaluation(sessionId, () => {
|
38 |
+
if (onComplete) {
|
39 |
+
onComplete();
|
40 |
+
}
|
41 |
+
});
|
42 |
+
|
43 |
+
// Handle automatic redirection when evaluation is complete
|
44 |
+
React.useEffect(() => {
|
45 |
+
if (realComplete || simulationComplete) {
|
46 |
+
navigate(`/evaluation-display?session=${sessionId}`);
|
47 |
+
}
|
48 |
+
}, [realComplete, simulationComplete, sessionId, navigate]);
|
49 |
+
|
50 |
+
// Start evaluation if not default and not started
|
51 |
+
React.useEffect(() => {
|
52 |
+
if (!isDefault && !evaluationStarted) {
|
53 |
+
startEvaluation();
|
54 |
+
}
|
55 |
+
}, [isDefault, evaluationStarted, startEvaluation]);
|
56 |
+
|
57 |
+
// Stop timer when complete
|
58 |
+
React.useEffect(() => {
|
59 |
+
if (realComplete || simulationComplete) {
|
60 |
+
stopTimer();
|
61 |
+
}
|
62 |
+
}, [realComplete, simulationComplete, stopTimer]);
|
63 |
+
|
64 |
+
const isComplete = realComplete || simulationComplete;
|
65 |
+
const currentStepInfo = isDefault
|
66 |
+
? `${currentMessage.message} (${currentMessage.step}/${currentMessage.totalSteps})`
|
67 |
+
: `${currentStepLabel} (${currentStep + 1}/${totalSteps})`;
|
68 |
+
|
69 |
+
return (
|
70 |
+
<Paper
|
71 |
+
elevation={3}
|
72 |
+
sx={{
|
73 |
+
p: 4,
|
74 |
+
mt: 3,
|
75 |
+
mb: 3,
|
76 |
+
display: "flex",
|
77 |
+
flexDirection: "column",
|
78 |
+
alignItems: "center",
|
79 |
+
justifyContent: "center",
|
80 |
+
minHeight: 200,
|
81 |
+
position: "relative",
|
82 |
+
}}
|
83 |
+
>
|
84 |
+
{/* Temps estimé */}
|
85 |
+
<Box
|
86 |
+
sx={{
|
87 |
+
position: "absolute",
|
88 |
+
top: 12,
|
89 |
+
right: 12,
|
90 |
+
backgroundColor: "rgba(0, 0, 0, 0.04)",
|
91 |
+
borderRadius: "4px",
|
92 |
+
px: 1,
|
93 |
+
py: 0.5,
|
94 |
+
display: "inline-flex",
|
95 |
+
alignItems: "center",
|
96 |
+
}}
|
97 |
+
>
|
98 |
+
<Typography
|
99 |
+
variant="caption"
|
100 |
+
sx={{
|
101 |
+
fontSize: "0.675rem",
|
102 |
+
color: "text.secondary",
|
103 |
+
fontWeight: 500,
|
104 |
+
}}
|
105 |
+
>
|
106 |
+
Estimated time ~ 1m30s
|
107 |
+
</Typography>
|
108 |
+
</Box>
|
109 |
+
|
110 |
+
{error ? (
|
111 |
+
<ErrorDisplay error={error} />
|
112 |
+
) : (
|
113 |
+
<>
|
114 |
+
{isComplete ? (
|
115 |
+
<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
|
116 |
+
Evaluation completed successfully!
|
117 |
+
</Alert>
|
118 |
+
) : (
|
119 |
+
<>
|
120 |
+
<CircularProgress size={60} sx={{ mb: 2 }} />
|
121 |
+
<Typography variant="h6" component="div" gutterBottom>
|
122 |
+
Benchmark evaluation...
|
123 |
+
</Typography>
|
124 |
+
|
125 |
+
{/* Step progress indicator */}
|
126 |
+
<Typography variant="body1" color="text.secondary">
|
127 |
+
{currentStepInfo}
|
128 |
+
</Typography>
|
129 |
+
|
130 |
+
{/* Timer display */}
|
131 |
+
<Box
|
132 |
+
sx={{
|
133 |
+
display: "flex",
|
134 |
+
alignItems: "center",
|
135 |
+
mt: 1,
|
136 |
+
color: "text.secondary",
|
137 |
+
opacity: 0.5,
|
138 |
+
}}
|
139 |
+
>
|
140 |
+
<Typography variant="body2">{formatElapsedTime()}</Typography>
|
141 |
+
</Box>
|
142 |
+
</>
|
143 |
+
)}
|
144 |
+
</>
|
145 |
+
)}
|
146 |
+
</Paper>
|
147 |
+
);
|
148 |
+
};
|
149 |
+
|
150 |
+
export default BenchmarkEvaluation;
|
frontend/src/components/Evaluation/hooks/useEvaluation.js
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useRef, useEffect } from "react";
|
2 |
+
import API_CONFIG from "../../../config/api";
|
3 |
+
|
4 |
+
// Define all evaluation steps in sequence
|
5 |
+
const EVALUATION_STEPS = [
|
6 |
+
"initializing",
|
7 |
+
"finding_available_model_providers",
|
8 |
+
"starting_evaluation_process",
|
9 |
+
"evaluating_models",
|
10 |
+
"storing_evaluation_results",
|
11 |
+
];
|
12 |
+
|
13 |
+
// Step labels for display
|
14 |
+
const STEP_LABELS = {
|
15 |
+
initializing: "Initializing evaluation environment",
|
16 |
+
finding_available_model_providers: "Finding available model providers",
|
17 |
+
starting_evaluation_process: "Starting evaluation process",
|
18 |
+
evaluating_models: "Evaluating models",
|
19 |
+
storing_evaluation_results: "Storing evaluation results",
|
20 |
+
};
|
21 |
+
|
22 |
+
// Error messages that should be treated as errors
|
23 |
+
const ERROR_MESSAGES = [
|
24 |
+
"heavy load",
|
25 |
+
"try again later",
|
26 |
+
"rate limit",
|
27 |
+
"RATE_LIMIT_EXCEEDED",
|
28 |
+
];
|
29 |
+
|
30 |
+
export const useEvaluation = (sessionId, onComplete) => {
|
31 |
+
const [error, setError] = useState(null);
|
32 |
+
const [evaluationComplete, setEvaluationComplete] = useState(false);
|
33 |
+
const [currentStep, setCurrentStep] = useState(0);
|
34 |
+
const [evaluationStarted, setEvaluationStarted] = useState(false);
|
35 |
+
const pollingIntervalRef = useRef(null);
|
36 |
+
|
37 |
+
const mapStepToIndex = (step) => {
|
38 |
+
return EVALUATION_STEPS.indexOf(step);
|
39 |
+
};
|
40 |
+
|
41 |
+
const checkForErrors = (logs) => {
|
42 |
+
if (!logs) return false;
|
43 |
+
|
44 |
+
const hasError = ERROR_MESSAGES.some((errorMessage) =>
|
45 |
+
logs.some((log) => log.toLowerCase().includes(errorMessage.toLowerCase()))
|
46 |
+
);
|
47 |
+
|
48 |
+
if (hasError) {
|
49 |
+
setError(
|
50 |
+
"The demo is currently under heavy load, please try again later."
|
51 |
+
);
|
52 |
+
setEvaluationComplete(true);
|
53 |
+
if (pollingIntervalRef.current) {
|
54 |
+
clearInterval(pollingIntervalRef.current);
|
55 |
+
}
|
56 |
+
return true;
|
57 |
+
}
|
58 |
+
return false;
|
59 |
+
};
|
60 |
+
|
61 |
+
const startEvaluation = async () => {
|
62 |
+
if (!sessionId) {
|
63 |
+
setError("Missing session ID");
|
64 |
+
return;
|
65 |
+
}
|
66 |
+
|
67 |
+
setEvaluationStarted(true);
|
68 |
+
|
69 |
+
try {
|
70 |
+
const response = await fetch(
|
71 |
+
`${API_CONFIG.BASE_URL}/evaluate-benchmark`,
|
72 |
+
{
|
73 |
+
method: "POST",
|
74 |
+
headers: {
|
75 |
+
"Content-Type": "application/json",
|
76 |
+
},
|
77 |
+
body: JSON.stringify({
|
78 |
+
session_id: sessionId,
|
79 |
+
}),
|
80 |
+
}
|
81 |
+
);
|
82 |
+
|
83 |
+
const result = await response.json();
|
84 |
+
|
85 |
+
if (response.ok) {
|
86 |
+
setupPolling();
|
87 |
+
} else {
|
88 |
+
setError(result.error || "Benchmark evaluation failed");
|
89 |
+
}
|
90 |
+
} catch (error) {
|
91 |
+
console.error("Error starting evaluation:", error);
|
92 |
+
setError("Error connecting to server");
|
93 |
+
}
|
94 |
+
};
|
95 |
+
|
96 |
+
const setupPolling = () => {
|
97 |
+
pollingIntervalRef.current = setInterval(async () => {
|
98 |
+
try {
|
99 |
+
const logsResponse = await fetch(
|
100 |
+
`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
|
101 |
+
);
|
102 |
+
|
103 |
+
if (logsResponse.ok) {
|
104 |
+
const logsResult = await logsResponse.json();
|
105 |
+
|
106 |
+
// Check for error messages in logs
|
107 |
+
if (checkForErrors(logsResult.logs)) {
|
108 |
+
return;
|
109 |
+
}
|
110 |
+
|
111 |
+
if (logsResult.is_completed) {
|
112 |
+
setEvaluationComplete(true);
|
113 |
+
clearInterval(pollingIntervalRef.current);
|
114 |
+
if (onComplete) {
|
115 |
+
onComplete();
|
116 |
+
}
|
117 |
+
} else if (logsResult.current_step) {
|
118 |
+
const newStepIndex = mapStepToIndex(logsResult.current_step);
|
119 |
+
if (newStepIndex !== -1) {
|
120 |
+
setCurrentStep(newStepIndex);
|
121 |
+
}
|
122 |
+
}
|
123 |
+
}
|
124 |
+
} catch (error) {
|
125 |
+
console.log("Error polling logs:", error);
|
126 |
+
}
|
127 |
+
}, 2000);
|
128 |
+
};
|
129 |
+
|
130 |
+
useEffect(() => {
|
131 |
+
return () => {
|
132 |
+
if (pollingIntervalRef.current) {
|
133 |
+
clearInterval(pollingIntervalRef.current);
|
134 |
+
}
|
135 |
+
};
|
136 |
+
}, []);
|
137 |
+
|
138 |
+
return {
|
139 |
+
error,
|
140 |
+
evaluationComplete,
|
141 |
+
currentStep,
|
142 |
+
evaluationStarted,
|
143 |
+
startEvaluation,
|
144 |
+
currentStepLabel:
|
145 |
+
STEP_LABELS[EVALUATION_STEPS[currentStep]] || "Processing",
|
146 |
+
totalSteps: EVALUATION_STEPS.length,
|
147 |
+
};
|
148 |
+
};
|
frontend/src/components/Evaluation/hooks/useSimulation.js
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useRef, useEffect } from "react";
|
2 |
+
|
3 |
+
// Simulation time in milliseconds for pre-calculated documents
|
4 |
+
const SIMULATION_DURATION = 120000; // 2 minutes
|
5 |
+
|
6 |
+
// Starting messages with their timing
|
7 |
+
const STARTING_MESSAGES = [
|
8 |
+
{ message: "Initializing evaluation environment", step: 1, totalSteps: 5 },
|
9 |
+
{ message: "Finding available model providers", step: 2, totalSteps: 5 },
|
10 |
+
{ message: "Starting evaluation process", step: 3, totalSteps: 5 },
|
11 |
+
{ message: "Evaluating models", step: 4, totalSteps: 5 },
|
12 |
+
{ message: "Storing evaluation results", step: 5, totalSteps: 5 },
|
13 |
+
];
|
14 |
+
|
15 |
+
export const useSimulation = (onComplete) => {
|
16 |
+
const [startingMessageIndex, setStartingMessageIndex] = useState(0);
|
17 |
+
const [evaluationComplete, setEvaluationComplete] = useState(false);
|
18 |
+
const simulationTimeoutRef = useRef(null);
|
19 |
+
const startingMessageIntervalRef = useRef(null);
|
20 |
+
|
21 |
+
useEffect(() => {
|
22 |
+
// Configure automatic interval for message changes
|
23 |
+
startingMessageIntervalRef.current = setInterval(() => {
|
24 |
+
setStartingMessageIndex((prev) => {
|
25 |
+
if (prev < STARTING_MESSAGES.length - 1) {
|
26 |
+
return prev + 1;
|
27 |
+
}
|
28 |
+
return prev;
|
29 |
+
});
|
30 |
+
}, SIMULATION_DURATION / STARTING_MESSAGES.length);
|
31 |
+
|
32 |
+
// Complete after simulation duration
|
33 |
+
simulationTimeoutRef.current = setTimeout(() => {
|
34 |
+
setEvaluationComplete(true);
|
35 |
+
if (startingMessageIntervalRef.current) {
|
36 |
+
clearInterval(startingMessageIntervalRef.current);
|
37 |
+
}
|
38 |
+
setStartingMessageIndex(STARTING_MESSAGES.length - 1);
|
39 |
+
if (onComplete) {
|
40 |
+
onComplete();
|
41 |
+
}
|
42 |
+
}, SIMULATION_DURATION);
|
43 |
+
|
44 |
+
return () => {
|
45 |
+
if (simulationTimeoutRef.current) {
|
46 |
+
clearTimeout(simulationTimeoutRef.current);
|
47 |
+
}
|
48 |
+
if (startingMessageIntervalRef.current) {
|
49 |
+
clearInterval(startingMessageIntervalRef.current);
|
50 |
+
}
|
51 |
+
};
|
52 |
+
}, [onComplete]);
|
53 |
+
|
54 |
+
return {
|
55 |
+
startingMessageIndex,
|
56 |
+
evaluationComplete,
|
57 |
+
currentMessage: STARTING_MESSAGES[startingMessageIndex],
|
58 |
+
};
|
59 |
+
};
|
frontend/src/components/Evaluation/hooks/useTimer.js
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useState, useRef, useEffect } from "react";
|
2 |
+
|
3 |
+
export const useTimer = () => {
|
4 |
+
const [elapsedTime, setElapsedTime] = useState(0);
|
5 |
+
const timerIntervalRef = useRef(null);
|
6 |
+
const startTimeRef = useRef(null);
|
7 |
+
|
8 |
+
const startTimer = () => {
|
9 |
+
startTimeRef.current = Date.now();
|
10 |
+
timerIntervalRef.current = setInterval(() => {
|
11 |
+
const timeElapsed = Math.floor(
|
12 |
+
(Date.now() - startTimeRef.current) / 1000
|
13 |
+
);
|
14 |
+
setElapsedTime(timeElapsed);
|
15 |
+
}, 1000);
|
16 |
+
};
|
17 |
+
|
18 |
+
const stopTimer = () => {
|
19 |
+
if (timerIntervalRef.current) {
|
20 |
+
clearInterval(timerIntervalRef.current);
|
21 |
+
}
|
22 |
+
};
|
23 |
+
|
24 |
+
const formatElapsedTime = () => {
|
25 |
+
const hours = Math.floor(elapsedTime / 3600);
|
26 |
+
const minutes = Math.floor((elapsedTime % 3600) / 60);
|
27 |
+
const seconds = elapsedTime % 60;
|
28 |
+
|
29 |
+
return [
|
30 |
+
hours.toString().padStart(2, "0"),
|
31 |
+
minutes.toString().padStart(2, "0"),
|
32 |
+
seconds.toString().padStart(2, "0"),
|
33 |
+
].join(":");
|
34 |
+
};
|
35 |
+
|
36 |
+
useEffect(() => {
|
37 |
+
startTimer();
|
38 |
+
return () => {
|
39 |
+
stopTimer();
|
40 |
+
};
|
41 |
+
}, []);
|
42 |
+
|
43 |
+
return {
|
44 |
+
elapsedTime,
|
45 |
+
formatElapsedTime,
|
46 |
+
stopTimer,
|
47 |
+
};
|
48 |
+
};
|
frontend/src/components/Footer/Footer.js
CHANGED
@@ -6,21 +6,23 @@ const Footer = () => {
|
|
6 |
<Box
|
7 |
component="footer"
|
8 |
sx={{
|
9 |
-
width: "
|
|
|
10 |
py: 4,
|
11 |
textAlign: "center",
|
|
|
12 |
}}
|
13 |
>
|
14 |
<Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
|
15 |
-
|
16 |
-
|
17 |
<Link
|
18 |
-
href="https://huggingface.co"
|
19 |
target="_blank"
|
20 |
rel="noopener noreferrer"
|
21 |
color="inherit"
|
22 |
>
|
23 |
-
|
24 |
</Link>
|
25 |
</Typography>
|
26 |
</Box>
|
|
|
6 |
<Box
|
7 |
component="footer"
|
8 |
sx={{
|
9 |
+
width: "70%",
|
10 |
+
margin: "0 auto",
|
11 |
py: 4,
|
12 |
textAlign: "center",
|
13 |
+
opacity: 0.7,
|
14 |
}}
|
15 |
>
|
16 |
<Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
|
17 |
+
We keep processed documents for research purposes, to which you agree by
|
18 |
+
using the space. For a fully private usage, please duplicate the{" "}
|
19 |
<Link
|
20 |
+
href="https://huggingface.co/spaces/yourbench/advanced"
|
21 |
target="_blank"
|
22 |
rel="noopener noreferrer"
|
23 |
color="inherit"
|
24 |
>
|
25 |
+
advanced demo space
|
26 |
</Link>
|
27 |
</Typography>
|
28 |
</Box>
|
frontend/src/components/Intro.jsx
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import React from "react";
|
2 |
-
import { Box, Typography } from "@mui/material";
|
3 |
import HFLogo from "./Logo/HFLogo";
|
4 |
|
5 |
const Intro = () => {
|
@@ -42,7 +42,19 @@ const Intro = () => {
|
|
42 |
YourBench is an <b>open-source framework</b> for generating{" "}
|
43 |
<b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
|
44 |
to keep your large language models on their toes—even as new data
|
45 |
-
sources, domains, and knowledge demands evolve.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
</Typography>
|
47 |
</Box>
|
48 |
);
|
|
|
1 |
import React from "react";
|
2 |
+
import { Box, Typography, Link } from "@mui/material";
|
3 |
import HFLogo from "./Logo/HFLogo";
|
4 |
|
5 |
const Intro = () => {
|
|
|
42 |
YourBench is an <b>open-source framework</b> for generating{" "}
|
43 |
<b>domain-specific benchmarks</b> in a <b>zero-shot</b> manner. It aims
|
44 |
to keep your large language models on their toes—even as new data
|
45 |
+
sources, domains, and knowledge demands evolve.
|
46 |
+
<br />
|
47 |
+
<br /> Currently, this is an <b>extremely minimal demo</b>. <br />
|
48 |
+
To <b>unlock the full capabilities</b>, please visit our{" "}
|
49 |
+
<Link
|
50 |
+
href="https://github.com/yourbench"
|
51 |
+
target="_blank"
|
52 |
+
rel="noopener noreferrer"
|
53 |
+
color="inherit"
|
54 |
+
>
|
55 |
+
<b>GitHub</b>
|
56 |
+
</Link>
|
57 |
+
!
|
58 |
</Typography>
|
59 |
</Box>
|
60 |
);
|
frontend/src/components/KeyboardShortcuts.jsx
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
import React, { useEffect } from "react";
|
2 |
-
|
3 |
-
function KeyboardShortcuts() {
|
4 |
-
useEffect(() => {
|
5 |
-
const handleKeyDown = (e) => {
|
6 |
-
if (e.key === "p") {
|
7 |
-
console.log("Debug key pressed: Clearing auth data and refreshing");
|
8 |
-
localStorage.removeItem("hf_oauth");
|
9 |
-
localStorage.removeItem("auth_return_to");
|
10 |
-
alert("Auth data cleared. Page will reload.");
|
11 |
-
window.location.reload();
|
12 |
-
}
|
13 |
-
};
|
14 |
-
|
15 |
-
window.addEventListener("keydown", handleKeyDown);
|
16 |
-
return () => {
|
17 |
-
window.removeEventListener("keydown", handleKeyDown);
|
18 |
-
};
|
19 |
-
}, []);
|
20 |
-
|
21 |
-
return null;
|
22 |
-
}
|
23 |
-
|
24 |
-
export default KeyboardShortcuts;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/{ExternalLinks.jsx → Navigation.jsx}
RENAMED
@@ -13,7 +13,7 @@ import OpenInNewIcon from "@mui/icons-material/OpenInNew";
|
|
13 |
import ShareIcon from "@mui/icons-material/Share";
|
14 |
import MenuIcon from "@mui/icons-material/Menu";
|
15 |
|
16 |
-
const
|
17 |
const [anchorEl, setAnchorEl] = useState(null);
|
18 |
const theme = useTheme();
|
19 |
const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
|
@@ -44,7 +44,7 @@ const ExternalLinks = () => {
|
|
44 |
url: "https://github.com/huggingface/yourbench",
|
45 |
},
|
46 |
{
|
47 |
-
name: "
|
48 |
url: "https://huggingface.co/spaces/yourbench/advanced",
|
49 |
},
|
50 |
];
|
@@ -175,4 +175,4 @@ const ExternalLinks = () => {
|
|
175 |
);
|
176 |
};
|
177 |
|
178 |
-
export default
|
|
|
13 |
import ShareIcon from "@mui/icons-material/Share";
|
14 |
import MenuIcon from "@mui/icons-material/Menu";
|
15 |
|
16 |
+
const Navigation = () => {
|
17 |
const [anchorEl, setAnchorEl] = useState(null);
|
18 |
const theme = useTheme();
|
19 |
const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
|
|
|
44 |
url: "https://github.com/huggingface/yourbench",
|
45 |
},
|
46 |
{
|
47 |
+
name: "Advanced demo",
|
48 |
url: "https://huggingface.co/spaces/yourbench/advanced",
|
49 |
},
|
50 |
];
|
|
|
175 |
);
|
176 |
};
|
177 |
|
178 |
+
export default Navigation;
|
frontend/src/components/common/ErrorDisplay.jsx
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import React from "react";
|
2 |
+
import { Box, Typography } from "@mui/material";
|
3 |
+
import SentimentVeryDissatisfiedIcon from "@mui/icons-material/SentimentVeryDissatisfied";
|
4 |
+
|
5 |
+
/**
|
6 |
+
* Generic error display component with centered icon and text
|
7 |
+
* @param {Object} props
|
8 |
+
* @param {string} props.error - The error message to display
|
9 |
+
* @param {string} [props.title="Error"] - Optional custom title
|
10 |
+
* @param {Object} [props.sx={}] - Optional additional styles
|
11 |
+
*/
|
12 |
+
const ErrorDisplay = ({ error, title = "Error", sx = {} }) => {
|
13 |
+
return (
|
14 |
+
<Box
|
15 |
+
sx={{
|
16 |
+
display: "flex",
|
17 |
+
flexDirection: "column",
|
18 |
+
alignItems: "center",
|
19 |
+
justifyContent: "center",
|
20 |
+
p: 4,
|
21 |
+
gap: 2,
|
22 |
+
...sx,
|
23 |
+
}}
|
24 |
+
>
|
25 |
+
<SentimentVeryDissatisfiedIcon
|
26 |
+
sx={{ fontSize: 60, color: "warning.main" }}
|
27 |
+
/>
|
28 |
+
<Typography variant="h6" color="warning">
|
29 |
+
{title}
|
30 |
+
</Typography>
|
31 |
+
<Typography
|
32 |
+
variant="body1"
|
33 |
+
align="center"
|
34 |
+
color="text.secondary"
|
35 |
+
sx={{ maxWidth: "80%", lineHeight: 1.5 }}
|
36 |
+
>
|
37 |
+
{error}
|
38 |
+
</Typography>
|
39 |
+
</Box>
|
40 |
+
);
|
41 |
+
};
|
42 |
+
|
43 |
+
export default ErrorDisplay;
|
frontend/src/components/shared/AuthContainer.js
DELETED
@@ -1,192 +0,0 @@
|
|
1 |
-
import React, { useEffect } from "react";
|
2 |
-
import {
|
3 |
-
Box,
|
4 |
-
Typography,
|
5 |
-
Button,
|
6 |
-
Chip,
|
7 |
-
Stack,
|
8 |
-
Paper,
|
9 |
-
CircularProgress,
|
10 |
-
useTheme,
|
11 |
-
useMediaQuery,
|
12 |
-
} from "@mui/material";
|
13 |
-
import HFLogo from "../Logo/HFLogo";
|
14 |
-
import { useAuth } from "../../hooks/useAuth";
|
15 |
-
import LogoutIcon from "@mui/icons-material/Logout";
|
16 |
-
import { useNavigate } from "react-router-dom";
|
17 |
-
|
18 |
-
function AuthContainer({ actionText = "DO_ACTION", onSuccess }) {
|
19 |
-
const { isAuthenticated, user, login, logout, loading } = useAuth();
|
20 |
-
const navigate = useNavigate();
|
21 |
-
const theme = useTheme();
|
22 |
-
const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
|
23 |
-
|
24 |
-
// Trigger onSuccess callback when user is authenticated
|
25 |
-
useEffect(() => {
|
26 |
-
if (isAuthenticated && onSuccess) {
|
27 |
-
// Add a small delay to ensure UI is updated properly
|
28 |
-
setTimeout(() => {
|
29 |
-
console.log("User is authenticated, calling onSuccess callback");
|
30 |
-
onSuccess();
|
31 |
-
}, 100);
|
32 |
-
}
|
33 |
-
}, [isAuthenticated, onSuccess]);
|
34 |
-
|
35 |
-
// Check localStorage manually as a fallback
|
36 |
-
useEffect(() => {
|
37 |
-
if (!isAuthenticated && !loading && onSuccess) {
|
38 |
-
const storedAuth = localStorage.getItem("hf_oauth");
|
39 |
-
if (storedAuth) {
|
40 |
-
console.log(
|
41 |
-
"Found auth data in localStorage but isAuthenticated is false, forcing onSuccess"
|
42 |
-
);
|
43 |
-
onSuccess();
|
44 |
-
}
|
45 |
-
}
|
46 |
-
}, [isAuthenticated, loading, onSuccess]);
|
47 |
-
|
48 |
-
const handleLogout = () => {
|
49 |
-
if (isAuthenticated && logout) {
|
50 |
-
logout();
|
51 |
-
navigate("/", { replace: true });
|
52 |
-
window.location.reload();
|
53 |
-
}
|
54 |
-
};
|
55 |
-
|
56 |
-
if (loading) {
|
57 |
-
return (
|
58 |
-
<Paper
|
59 |
-
elevation={0}
|
60 |
-
sx={{
|
61 |
-
p: 3,
|
62 |
-
mb: 4,
|
63 |
-
border: "1px solid",
|
64 |
-
borderColor: "grey.300",
|
65 |
-
display: "flex",
|
66 |
-
flexDirection: "column",
|
67 |
-
alignItems: "center",
|
68 |
-
gap: 2,
|
69 |
-
}}
|
70 |
-
>
|
71 |
-
<CircularProgress size={24} />
|
72 |
-
</Paper>
|
73 |
-
);
|
74 |
-
}
|
75 |
-
|
76 |
-
if (!isAuthenticated) {
|
77 |
-
return (
|
78 |
-
<Paper
|
79 |
-
elevation={0}
|
80 |
-
sx={{
|
81 |
-
p: 3,
|
82 |
-
mb: 4,
|
83 |
-
border: "1px solid",
|
84 |
-
borderColor: "grey.300",
|
85 |
-
display: "flex",
|
86 |
-
flexDirection: "column",
|
87 |
-
alignItems: "center",
|
88 |
-
gap: 2,
|
89 |
-
}}
|
90 |
-
>
|
91 |
-
<Typography variant="h6" align="center">
|
92 |
-
Login to {actionText}
|
93 |
-
</Typography>
|
94 |
-
<Typography
|
95 |
-
variant="body2"
|
96 |
-
color="text.secondary"
|
97 |
-
align="center"
|
98 |
-
sx={{
|
99 |
-
px: isMobile ? 2 : 0,
|
100 |
-
}}
|
101 |
-
>
|
102 |
-
You need to be logged in with your Hugging Face account to{" "}
|
103 |
-
{actionText.toLowerCase()}
|
104 |
-
</Typography>
|
105 |
-
<Button
|
106 |
-
variant="contained"
|
107 |
-
onClick={login}
|
108 |
-
startIcon={
|
109 |
-
<Box
|
110 |
-
sx={{
|
111 |
-
width: 20,
|
112 |
-
height: 20,
|
113 |
-
display: "flex",
|
114 |
-
alignItems: "center",
|
115 |
-
}}
|
116 |
-
>
|
117 |
-
<HFLogo />
|
118 |
-
</Box>
|
119 |
-
}
|
120 |
-
sx={{
|
121 |
-
textTransform: "none",
|
122 |
-
fontWeight: 600,
|
123 |
-
py: 1,
|
124 |
-
px: 2,
|
125 |
-
width: isMobile ? "100%" : "auto",
|
126 |
-
}}
|
127 |
-
>
|
128 |
-
Sign in with Hugging Face
|
129 |
-
</Button>
|
130 |
-
</Paper>
|
131 |
-
);
|
132 |
-
}
|
133 |
-
|
134 |
-
return (
|
135 |
-
<Paper
|
136 |
-
elevation={0}
|
137 |
-
sx={{ p: 2, border: "1px solid", borderColor: "grey.300", mb: 4 }}
|
138 |
-
>
|
139 |
-
<Stack
|
140 |
-
direction={isMobile ? "column" : "row"}
|
141 |
-
spacing={2}
|
142 |
-
alignItems={isMobile ? "stretch" : "center"}
|
143 |
-
justifyContent="space-between"
|
144 |
-
>
|
145 |
-
<Stack
|
146 |
-
direction={isMobile ? "column" : "row"}
|
147 |
-
spacing={1}
|
148 |
-
alignItems={isMobile ? "stretch" : "center"}
|
149 |
-
sx={{ width: "100%" }}
|
150 |
-
>
|
151 |
-
<Typography
|
152 |
-
variant="body1"
|
153 |
-
align={isMobile ? "center" : "left"}
|
154 |
-
sx={{ mb: isMobile ? 1 : 0 }}
|
155 |
-
>
|
156 |
-
Connected as <strong>{user?.username}</strong>
|
157 |
-
</Typography>
|
158 |
-
<Chip
|
159 |
-
label={`Ready to ${actionText}`}
|
160 |
-
color="success"
|
161 |
-
size="small"
|
162 |
-
variant="outlined"
|
163 |
-
sx={{
|
164 |
-
width: isMobile ? "100%" : "auto",
|
165 |
-
height: isMobile ? 32 : 24,
|
166 |
-
"& .MuiChip-label": {
|
167 |
-
px: isMobile ? 2 : 1,
|
168 |
-
},
|
169 |
-
}}
|
170 |
-
/>
|
171 |
-
</Stack>
|
172 |
-
<Button
|
173 |
-
variant="contained"
|
174 |
-
onClick={handleLogout}
|
175 |
-
endIcon={<LogoutIcon />}
|
176 |
-
color="primary"
|
177 |
-
sx={{
|
178 |
-
minWidth: 120,
|
179 |
-
height: 36,
|
180 |
-
textTransform: "none",
|
181 |
-
fontSize: "0.9375rem",
|
182 |
-
width: isMobile ? "100%" : "auto",
|
183 |
-
}}
|
184 |
-
>
|
185 |
-
Logout
|
186 |
-
</Button>
|
187 |
-
</Stack>
|
188 |
-
</Paper>
|
189 |
-
);
|
190 |
-
}
|
191 |
-
|
192 |
-
export default AuthContainer;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/shared/CodeBlock.js
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import React from 'react';
|
2 |
-
import { Box, IconButton } from '@mui/material';
|
3 |
-
import ContentCopyIcon from '@mui/icons-material/ContentCopy';
|
4 |
-
|
5 |
-
const CodeBlock = ({ code }) => (
|
6 |
-
<Box sx={{ position: 'relative' }}>
|
7 |
-
<IconButton
|
8 |
-
onClick={() => navigator.clipboard.writeText(code)}
|
9 |
-
sx={{
|
10 |
-
position: 'absolute',
|
11 |
-
top: 8,
|
12 |
-
right: 8,
|
13 |
-
color: 'grey.500',
|
14 |
-
'&:hover': { color: 'grey.300' },
|
15 |
-
}}
|
16 |
-
>
|
17 |
-
<ContentCopyIcon fontSize="small" />
|
18 |
-
</IconButton>
|
19 |
-
<Box
|
20 |
-
sx={{
|
21 |
-
backgroundColor: 'grey.900',
|
22 |
-
color: 'grey.100',
|
23 |
-
p: 2,
|
24 |
-
borderRadius: 1,
|
25 |
-
fontFamily: 'monospace',
|
26 |
-
fontSize: '0.875rem',
|
27 |
-
overflowX: 'auto',
|
28 |
-
textAlign: 'left',
|
29 |
-
whiteSpace: 'pre',
|
30 |
-
}}
|
31 |
-
>
|
32 |
-
{code}
|
33 |
-
</Box>
|
34 |
-
</Box>
|
35 |
-
);
|
36 |
-
|
37 |
-
export default CodeBlock;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/shared/FilterTag.js
DELETED
@@ -1,139 +0,0 @@
|
|
1 |
-
import React from "react";
|
2 |
-
import { Chip } from "@mui/material";
|
3 |
-
import { useTheme } from "@mui/material/styles";
|
4 |
-
import { alpha } from "@mui/material/styles";
|
5 |
-
import CheckBoxOutlineBlankIcon from "@mui/icons-material/CheckBoxOutlineBlank";
|
6 |
-
import CheckBoxOutlinedIcon from "@mui/icons-material/CheckBoxOutlined";
|
7 |
-
|
8 |
-
const FilterTag = ({
|
9 |
-
label,
|
10 |
-
checked,
|
11 |
-
onChange,
|
12 |
-
count,
|
13 |
-
isHideFilter = false,
|
14 |
-
totalCount = 0,
|
15 |
-
variant = "tag",
|
16 |
-
showCheckbox = false,
|
17 |
-
stacked = false,
|
18 |
-
sx = {},
|
19 |
-
}) => {
|
20 |
-
const theme = useTheme();
|
21 |
-
|
22 |
-
const formatCount = (count) => {
|
23 |
-
if (count === undefined) return "";
|
24 |
-
return `${count}`;
|
25 |
-
};
|
26 |
-
|
27 |
-
const mainLabel = label;
|
28 |
-
const countLabel = count !== undefined ? formatCount(count) : "";
|
29 |
-
|
30 |
-
return (
|
31 |
-
<Chip
|
32 |
-
icon={
|
33 |
-
showCheckbox ? (
|
34 |
-
checked ? (
|
35 |
-
<CheckBoxOutlinedIcon
|
36 |
-
sx={{
|
37 |
-
fontSize: "1.1rem",
|
38 |
-
ml: 0.8,
|
39 |
-
color: checked
|
40 |
-
? variant === "secondary"
|
41 |
-
? "secondary.main"
|
42 |
-
: "primary.main"
|
43 |
-
: "text.secondary",
|
44 |
-
}}
|
45 |
-
/>
|
46 |
-
) : (
|
47 |
-
<CheckBoxOutlineBlankIcon
|
48 |
-
sx={{
|
49 |
-
fontSize: "1.1rem",
|
50 |
-
ml: 0.8,
|
51 |
-
color: "text.secondary",
|
52 |
-
}}
|
53 |
-
/>
|
54 |
-
)
|
55 |
-
) : null
|
56 |
-
}
|
57 |
-
label={
|
58 |
-
<span>
|
59 |
-
{mainLabel}
|
60 |
-
{countLabel && (
|
61 |
-
<>
|
62 |
-
<span
|
63 |
-
style={{
|
64 |
-
display: "inline-block",
|
65 |
-
width: "3px",
|
66 |
-
height: "3px",
|
67 |
-
borderRadius: "50%",
|
68 |
-
backgroundColor: "currentColor",
|
69 |
-
opacity: 0.2,
|
70 |
-
margin: "0 4px",
|
71 |
-
verticalAlign: "middle",
|
72 |
-
}}
|
73 |
-
/>
|
74 |
-
<span style={{ opacity: 0.5 }}>{countLabel}</span>
|
75 |
-
</>
|
76 |
-
)}
|
77 |
-
</span>
|
78 |
-
}
|
79 |
-
onClick={onChange}
|
80 |
-
variant="outlined"
|
81 |
-
color={
|
82 |
-
checked
|
83 |
-
? variant === "secondary"
|
84 |
-
? "secondary"
|
85 |
-
: "primary"
|
86 |
-
: "default"
|
87 |
-
}
|
88 |
-
size="small"
|
89 |
-
data-checked={checked}
|
90 |
-
sx={{
|
91 |
-
height: "32px",
|
92 |
-
fontWeight: 600,
|
93 |
-
opacity: checked ? 1 : 0.8,
|
94 |
-
borderRadius: "5px",
|
95 |
-
borderWidth: "1px",
|
96 |
-
borderStyle: "solid",
|
97 |
-
cursor: "pointer",
|
98 |
-
pl: showCheckbox ? 0.5 : 0,
|
99 |
-
mr: 0.5,
|
100 |
-
mb: 0.5,
|
101 |
-
transition: "opacity 0.2s ease, border-color 0.2s ease",
|
102 |
-
"& .MuiChip-label": {
|
103 |
-
px: 0.75,
|
104 |
-
pl: showCheckbox ? 0.6 : 0.75,
|
105 |
-
},
|
106 |
-
"& .MuiChip-icon": {
|
107 |
-
mr: 0.5,
|
108 |
-
pl: 0.2,
|
109 |
-
},
|
110 |
-
"&:hover": {
|
111 |
-
opacity: 1,
|
112 |
-
backgroundColor: checked
|
113 |
-
? alpha(
|
114 |
-
theme.palette[variant === "secondary" ? "secondary" : "primary"]
|
115 |
-
.main,
|
116 |
-
theme.palette.mode === "light" ? 0.08 : 0.16
|
117 |
-
)
|
118 |
-
: "action.hover",
|
119 |
-
borderWidth: "1px",
|
120 |
-
},
|
121 |
-
backgroundColor: checked
|
122 |
-
? alpha(
|
123 |
-
theme.palette[variant === "secondary" ? "secondary" : "primary"]
|
124 |
-
.main,
|
125 |
-
theme.palette.mode === "light" ? 0.08 : 0.16
|
126 |
-
)
|
127 |
-
: "background.paper",
|
128 |
-
borderColor: checked
|
129 |
-
? variant === "secondary"
|
130 |
-
? "secondary.main"
|
131 |
-
: "primary.main"
|
132 |
-
: "divider",
|
133 |
-
...sx,
|
134 |
-
}}
|
135 |
-
/>
|
136 |
-
);
|
137 |
-
};
|
138 |
-
|
139 |
-
export default FilterTag;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/shared/InfoIconWithTooltip.js
DELETED
@@ -1,87 +0,0 @@
|
|
1 |
-
import React from "react";
|
2 |
-
import { Box, Tooltip, Portal, Backdrop } from "@mui/material";
|
3 |
-
import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined";
|
4 |
-
|
5 |
-
const InfoIconWithTooltip = ({ tooltip, iconProps = {}, sx = {} }) => {
|
6 |
-
const [open, setOpen] = React.useState(false);
|
7 |
-
|
8 |
-
return (
|
9 |
-
<>
|
10 |
-
<Tooltip
|
11 |
-
title={tooltip}
|
12 |
-
arrow
|
13 |
-
placement="top"
|
14 |
-
open={open}
|
15 |
-
onOpen={() => setOpen(true)}
|
16 |
-
onClose={() => setOpen(false)}
|
17 |
-
componentsProps={{
|
18 |
-
tooltip: {
|
19 |
-
sx: {
|
20 |
-
bgcolor: "rgba(33, 33, 33, 0.95)",
|
21 |
-
padding: "12px 16px",
|
22 |
-
maxWidth: "none !important",
|
23 |
-
width: "auto",
|
24 |
-
minWidth: "200px",
|
25 |
-
fontSize: "0.875rem",
|
26 |
-
lineHeight: 1.5,
|
27 |
-
position: "relative",
|
28 |
-
zIndex: 1501,
|
29 |
-
"& .MuiTooltip-arrow": {
|
30 |
-
color: "rgba(33, 33, 33, 0.95)",
|
31 |
-
},
|
32 |
-
},
|
33 |
-
},
|
34 |
-
popper: {
|
35 |
-
sx: {
|
36 |
-
zIndex: 1501,
|
37 |
-
maxWidth: "min(600px, 90vw) !important",
|
38 |
-
'&[data-popper-placement*="bottom"] .MuiTooltip-tooltip': {
|
39 |
-
marginTop: "10px",
|
40 |
-
},
|
41 |
-
'&[data-popper-placement*="top"] .MuiTooltip-tooltip': {
|
42 |
-
marginBottom: "10px",
|
43 |
-
},
|
44 |
-
},
|
45 |
-
},
|
46 |
-
}}
|
47 |
-
>
|
48 |
-
<Box
|
49 |
-
component="span"
|
50 |
-
sx={{
|
51 |
-
opacity: 0.5,
|
52 |
-
display: "flex",
|
53 |
-
alignItems: "center",
|
54 |
-
cursor: "help",
|
55 |
-
"&:hover": { opacity: 0.8 },
|
56 |
-
position: "relative",
|
57 |
-
zIndex: open ? 1502 : "auto",
|
58 |
-
...sx,
|
59 |
-
}}
|
60 |
-
>
|
61 |
-
<InfoOutlinedIcon
|
62 |
-
sx={{
|
63 |
-
fontSize: "1rem",
|
64 |
-
...iconProps.sx,
|
65 |
-
}}
|
66 |
-
{...iconProps}
|
67 |
-
/>
|
68 |
-
</Box>
|
69 |
-
</Tooltip>
|
70 |
-
{open && (
|
71 |
-
<Portal>
|
72 |
-
<Backdrop
|
73 |
-
open={true}
|
74 |
-
sx={{
|
75 |
-
zIndex: 1500,
|
76 |
-
backgroundColor: "rgba(0, 0, 0, 0.5)",
|
77 |
-
transition: "opacity 0.2s ease",
|
78 |
-
pointerEvents: "none",
|
79 |
-
}}
|
80 |
-
/>
|
81 |
-
</Portal>
|
82 |
-
)}
|
83 |
-
</>
|
84 |
-
);
|
85 |
-
};
|
86 |
-
|
87 |
-
export default InfoIconWithTooltip;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/shared/PageHeader.js
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
import React from "react";
|
2 |
-
import { Box, Typography } from "@mui/material";
|
3 |
-
|
4 |
-
const PageHeader = ({ title, subtitle }) => {
|
5 |
-
return (
|
6 |
-
<Box
|
7 |
-
sx={{
|
8 |
-
display: "flex",
|
9 |
-
flexDirection: "column",
|
10 |
-
alignItems: "center",
|
11 |
-
textAlign: "center",
|
12 |
-
mb: 6,
|
13 |
-
mt: 6,
|
14 |
-
gap: 2,
|
15 |
-
}}
|
16 |
-
>
|
17 |
-
<Typography fontWeight="bold" variant="h3" component="h1">
|
18 |
-
{title}
|
19 |
-
</Typography>
|
20 |
-
{subtitle && (
|
21 |
-
<Typography variant="h6" color="text.secondary">
|
22 |
-
{subtitle}
|
23 |
-
</Typography>
|
24 |
-
)}
|
25 |
-
</Box>
|
26 |
-
);
|
27 |
-
};
|
28 |
-
|
29 |
-
export default PageHeader;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/pages/BenchmarkDisplayPage.jsx
CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
-
import
|
6 |
import API_CONFIG from "../config/api";
|
7 |
import { useThemeMode } from "../hooks/useThemeMode";
|
8 |
import getTheme from "../config/theme";
|
@@ -138,7 +138,7 @@ function BenchmarkDisplayPage() {
|
|
138 |
bgcolor: "background.paper",
|
139 |
}}
|
140 |
>
|
141 |
-
<
|
142 |
onStartEvaluation={handleStartEvaluation}
|
143 |
sessionId={sessionId}
|
144 |
datasetUrl={datasetUrl}
|
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
+
import Display from "../components/Benchmark/Display";
|
6 |
import API_CONFIG from "../config/api";
|
7 |
import { useThemeMode } from "../hooks/useThemeMode";
|
8 |
import getTheme from "../config/theme";
|
|
|
138 |
bgcolor: "background.paper",
|
139 |
}}
|
140 |
>
|
141 |
+
<Display
|
142 |
onStartEvaluation={handleStartEvaluation}
|
143 |
sessionId={sessionId}
|
144 |
datasetUrl={datasetUrl}
|
frontend/src/pages/BenchmarkEvaluationPage.jsx
CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect } from "react";
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
-
import
|
6 |
import API_CONFIG from "../config/api";
|
7 |
|
8 |
function BenchmarkEvaluationPage() {
|
@@ -75,7 +75,7 @@ function BenchmarkEvaluationPage() {
|
|
75 |
<CircularProgress size={60} />
|
76 |
</Box>
|
77 |
) : (
|
78 |
-
<
|
79 |
sessionId={sessionId}
|
80 |
isDefaultDocument={isDefault}
|
81 |
onComplete={handleEvaluationComplete}
|
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
+
import Evaluation from "../components/Evaluation/Evaluation";
|
6 |
import API_CONFIG from "../config/api";
|
7 |
|
8 |
function BenchmarkEvaluationPage() {
|
|
|
75 |
<CircularProgress size={60} />
|
76 |
</Box>
|
77 |
) : (
|
78 |
+
<Evaluation
|
79 |
sessionId={sessionId}
|
80 |
isDefaultDocument={isDefault}
|
81 |
onComplete={handleEvaluationComplete}
|
frontend/src/pages/BenchmarkGenerationPage.jsx
CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useRef } from "react";
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
-
import
|
6 |
|
7 |
function BenchmarkGenerationPage() {
|
8 |
const navigate = useNavigate();
|
@@ -36,7 +36,7 @@ function BenchmarkGenerationPage() {
|
|
36 |
return (
|
37 |
<>
|
38 |
<Intro />
|
39 |
-
<
|
40 |
sessionId={sessionId}
|
41 |
isDefaultDocument={isDefault}
|
42 |
onComplete={handleGenerationComplete}
|
|
|
2 |
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useNavigate, useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
+
import Generator from "../components/Benchmark/Generator";
|
6 |
|
7 |
function BenchmarkGenerationPage() {
|
8 |
const navigate = useNavigate();
|
|
|
36 |
return (
|
37 |
<>
|
38 |
<Intro />
|
39 |
+
<Generator
|
40 |
sessionId={sessionId}
|
41 |
isDefaultDocument={isDefault}
|
42 |
onComplete={handleGenerationComplete}
|
frontend/src/pages/EvaluationDisplayPage.jsx
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
import React, { useState, useEffect } from "react";
|
2 |
-
import { Box, CircularProgress
|
3 |
import { useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
-
import
|
6 |
import { useThemeMode } from "../hooks/useThemeMode";
|
7 |
import getTheme from "../config/theme";
|
8 |
import API_CONFIG from "../config/api";
|
|
|
9 |
|
10 |
function EvaluationDisplayPage() {
|
11 |
const [searchParams] = useSearchParams();
|
@@ -132,9 +133,7 @@ function EvaluationDisplayPage() {
|
|
132 |
<CircularProgress size={60} />
|
133 |
</Box>
|
134 |
) : error ? (
|
135 |
-
<
|
136 |
-
{error}
|
137 |
-
</Alert>
|
138 |
) : (
|
139 |
<Box
|
140 |
sx={{
|
@@ -144,10 +143,7 @@ function EvaluationDisplayPage() {
|
|
144 |
bgcolor: "background.paper",
|
145 |
}}
|
146 |
>
|
147 |
-
<
|
148 |
-
sessionId={sessionId}
|
149 |
-
results={evaluationResults}
|
150 |
-
/>
|
151 |
</Box>
|
152 |
)}
|
153 |
</>
|
|
|
1 |
import React, { useState, useEffect } from "react";
|
2 |
+
import { Box, CircularProgress } from "@mui/material";
|
3 |
import { useSearchParams, Navigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
+
import Display from "../components/Evaluation/Display";
|
6 |
import { useThemeMode } from "../hooks/useThemeMode";
|
7 |
import getTheme from "../config/theme";
|
8 |
import API_CONFIG from "../config/api";
|
9 |
+
import ErrorDisplay from "../components/common/ErrorDisplay";
|
10 |
|
11 |
function EvaluationDisplayPage() {
|
12 |
const [searchParams] = useSearchParams();
|
|
|
133 |
<CircularProgress size={60} />
|
134 |
</Box>
|
135 |
) : error ? (
|
136 |
+
<ErrorDisplay error={error} title="Error" />
|
|
|
|
|
137 |
) : (
|
138 |
<Box
|
139 |
sx={{
|
|
|
143 |
bgcolor: "background.paper",
|
144 |
}}
|
145 |
>
|
146 |
+
<Display sessionId={sessionId} results={evaluationResults} />
|
|
|
|
|
|
|
147 |
</Box>
|
148 |
)}
|
149 |
</>
|
frontend/src/pages/HomePage.jsx
CHANGED
@@ -2,7 +2,7 @@ import React from "react";
|
|
2 |
import { Box } from "@mui/material";
|
3 |
import { useNavigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
-
import
|
6 |
import { useThemeMode } from "../hooks/useThemeMode";
|
7 |
import getTheme from "../config/theme";
|
8 |
|
@@ -30,7 +30,7 @@ function HomePage() {
|
|
30 |
bgcolor: "background.paper",
|
31 |
}}
|
32 |
>
|
33 |
-
<
|
34 |
</Box>
|
35 |
</>
|
36 |
);
|
|
|
2 |
import { Box } from "@mui/material";
|
3 |
import { useNavigate } from "react-router-dom";
|
4 |
import Intro from "../components/Intro";
|
5 |
+
import CreateForm from "../components/Benchmark/CreateForm";
|
6 |
import { useThemeMode } from "../hooks/useThemeMode";
|
7 |
import getTheme from "../config/theme";
|
8 |
|
|
|
30 |
bgcolor: "background.paper",
|
31 |
}}
|
32 |
>
|
33 |
+
<CreateForm onStartGeneration={handleStartGeneration} />
|
34 |
</Box>
|
35 |
</>
|
36 |
);
|