Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
block >1mo files | translate comments in english
Browse files- backend/routes/benchmark.py +16 -16
- backend/tasks/evaluation_task.py +12 -12
- backend/tests/run_bench.py +1 -1
- backend/tests/run_lighteval.py +3 -3
- frontend/src/App.js +14 -14
- frontend/src/components/BenchmarkCreateForm.jsx +23 -0
- frontend/src/components/BenchmarkDisplay.jsx +5 -20
- frontend/src/pages/BenchmarkDisplayPage.jsx +11 -16
- frontend/src/pages/BenchmarkEvaluationPage.jsx +6 -10
- test_import.py +2 -2
backend/routes/benchmark.py
CHANGED
@@ -27,9 +27,9 @@ async def generate_benchmark(data: Dict[str, Any]):
|
|
27 |
"""
|
28 |
session_id = data.get("session_id")
|
29 |
|
30 |
-
#
|
31 |
-
print(f"DEBUG: Session ID
|
32 |
-
print(f"DEBUG:
|
33 |
|
34 |
if not session_id or session_id not in router.session_files:
|
35 |
return {"error": "Invalid or missing session ID"}
|
@@ -112,9 +112,9 @@ class UnifiedBenchmarkTask:
|
|
112 |
Args:
|
113 |
message: Log message to add
|
114 |
"""
|
115 |
-
if message not in self.logs: #
|
116 |
self.logs.append(message)
|
117 |
-
#
|
118 |
self.logs = self.logs.copy()
|
119 |
print(f"[{self.session_uid}] {message}")
|
120 |
|
@@ -143,7 +143,7 @@ class UnifiedBenchmarkTask:
|
|
143 |
Args:
|
144 |
file_path: Path to the uploaded file
|
145 |
"""
|
146 |
-
#
|
147 |
import threading
|
148 |
thread = threading.Thread(target=self._run_process, args=(file_path,))
|
149 |
thread.daemon = True
|
@@ -157,43 +157,43 @@ class UnifiedBenchmarkTask:
|
|
157 |
file_path: Path to the uploaded file
|
158 |
"""
|
159 |
try:
|
160 |
-
#
|
161 |
self._add_log("[INFO] Starting configuration process")
|
162 |
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
|
163 |
|
164 |
-
#
|
165 |
config_path = self.config_task.run(file_path=file_path)
|
166 |
|
167 |
-
#
|
168 |
config_logs = self.config_task.get_logs()
|
169 |
for log in config_logs:
|
170 |
self._add_log(log)
|
171 |
|
172 |
-
#
|
173 |
if "[SUCCESS] Stage completed: config_generation" not in self.logs:
|
174 |
self._add_log("[SUCCESS] Stage completed: configuration")
|
175 |
|
176 |
-
#
|
177 |
self._add_log("[INFO] Starting benchmark process")
|
178 |
self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
|
179 |
|
180 |
-
#
|
181 |
self.bench_task.run()
|
182 |
|
183 |
-
#
|
184 |
while not self.bench_task.is_task_completed():
|
185 |
-
#
|
186 |
bench_logs = self.bench_task.get_logs()
|
187 |
for log in bench_logs:
|
188 |
self._add_log(log)
|
189 |
time.sleep(1)
|
190 |
|
191 |
-
#
|
192 |
final_logs = self.bench_task.get_logs()
|
193 |
for log in final_logs:
|
194 |
self._add_log(log)
|
195 |
|
196 |
-
#
|
197 |
self.is_completed = True
|
198 |
self._add_log("[SUCCESS] Benchmark process completed successfully")
|
199 |
|
|
|
27 |
"""
|
28 |
session_id = data.get("session_id")
|
29 |
|
30 |
+
# Debug to check session_files and received session_id
|
31 |
+
print(f"DEBUG: Session ID received: {session_id}")
|
32 |
+
print(f"DEBUG: Available session files: {list(router.session_files.keys())}")
|
33 |
|
34 |
if not session_id or session_id not in router.session_files:
|
35 |
return {"error": "Invalid or missing session ID"}
|
|
|
112 |
Args:
|
113 |
message: Log message to add
|
114 |
"""
|
115 |
+
if message not in self.logs: # Avoid duplicates
|
116 |
self.logs.append(message)
|
117 |
+
# Force a copy to avoid reference problems
|
118 |
self.logs = self.logs.copy()
|
119 |
print(f"[{self.session_uid}] {message}")
|
120 |
|
|
|
143 |
Args:
|
144 |
file_path: Path to the uploaded file
|
145 |
"""
|
146 |
+
# Start in a separate thread to avoid blocking
|
147 |
import threading
|
148 |
thread = threading.Thread(target=self._run_process, args=(file_path,))
|
149 |
thread.daemon = True
|
|
|
157 |
file_path: Path to the uploaded file
|
158 |
"""
|
159 |
try:
|
160 |
+
# Step 1: Configuration
|
161 |
self._add_log("[INFO] Starting configuration process")
|
162 |
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
|
163 |
|
164 |
+
# Execute the configuration task
|
165 |
config_path = self.config_task.run(file_path=file_path)
|
166 |
|
167 |
+
# Get configuration logs
|
168 |
config_logs = self.config_task.get_logs()
|
169 |
for log in config_logs:
|
170 |
self._add_log(log)
|
171 |
|
172 |
+
# Mark configuration step as completed
|
173 |
if "[SUCCESS] Stage completed: config_generation" not in self.logs:
|
174 |
self._add_log("[SUCCESS] Stage completed: configuration")
|
175 |
|
176 |
+
# Step 2: Benchmark
|
177 |
self._add_log("[INFO] Starting benchmark process")
|
178 |
self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
|
179 |
|
180 |
+
# Run the benchmark task
|
181 |
self.bench_task.run()
|
182 |
|
183 |
+
# Wait for the benchmark task to complete
|
184 |
while not self.bench_task.is_task_completed():
|
185 |
+
# Get new logs and add them
|
186 |
bench_logs = self.bench_task.get_logs()
|
187 |
for log in bench_logs:
|
188 |
self._add_log(log)
|
189 |
time.sleep(1)
|
190 |
|
191 |
+
# Get final logs
|
192 |
final_logs = self.bench_task.get_logs()
|
193 |
for log in final_logs:
|
194 |
self._add_log(log)
|
195 |
|
196 |
+
# Mark as completed
|
197 |
self.is_completed = True
|
198 |
self._add_log("[SUCCESS] Benchmark process completed successfully")
|
199 |
|
backend/tasks/evaluation_task.py
CHANGED
@@ -47,28 +47,28 @@ class EvaluationTask:
|
|
47 |
|
48 |
def clean_old_results(self) -> None:
|
49 |
"""
|
50 |
-
|
51 |
"""
|
52 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
53 |
|
54 |
-
#
|
55 |
results_dir = Path(f"uploaded_files/{self.session_uid}/lighteval_results")
|
56 |
|
57 |
-
#
|
58 |
if results_dir.exists():
|
59 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
60 |
shutil.rmtree(results_dir)
|
61 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
62 |
else:
|
63 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
64 |
|
65 |
-
#
|
66 |
if os.path.exists("data/lighteval_results"):
|
67 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
68 |
try:
|
69 |
shutil.rmtree("data/lighteval_results", ignore_errors=True)
|
70 |
except Exception as e:
|
71 |
-
print(f"[{datetime.now().strftime('%H:%M:%S')}]
|
72 |
|
73 |
def _save_results_to_hub(self) -> None:
|
74 |
"""
|
@@ -224,9 +224,9 @@ TASKS_TABLE = [yourbench]
|
|
224 |
Run the evaluation task asynchronously
|
225 |
|
226 |
Args:
|
227 |
-
clean_first: If True, clean old results before starting (
|
228 |
"""
|
229 |
-
#
|
230 |
self.clean_old_results()
|
231 |
|
232 |
# Start global timer
|
|
|
47 |
|
48 |
def clean_old_results(self) -> None:
|
49 |
"""
|
50 |
+
Clean old evaluation results to avoid confusion
|
51 |
"""
|
52 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Checking and cleaning old results...")
|
53 |
|
54 |
+
# Path to LightEval results
|
55 |
results_dir = Path(f"uploaded_files/{self.session_uid}/lighteval_results")
|
56 |
|
57 |
+
# Delete if exists
|
58 |
if results_dir.exists():
|
59 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Deleting old LightEval results")
|
60 |
shutil.rmtree(results_dir)
|
61 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Cleaning complete")
|
62 |
else:
|
63 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] No old results found")
|
64 |
|
65 |
+
# Also check for intermediate lighteval results
|
66 |
if os.path.exists("data/lighteval_results"):
|
67 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Cleaning intermediate results")
|
68 |
try:
|
69 |
shutil.rmtree("data/lighteval_results", ignore_errors=True)
|
70 |
except Exception as e:
|
71 |
+
print(f"[{datetime.now().strftime('%H:%M:%S')}] Error cleaning intermediate results: {str(e)}")
|
72 |
|
73 |
def _save_results_to_hub(self) -> None:
|
74 |
"""
|
|
|
224 |
Run the evaluation task asynchronously
|
225 |
|
226 |
Args:
|
227 |
+
clean_first: If True, clean old results before starting (default: True)
|
228 |
"""
|
229 |
+
# Systematically clean old results before starting
|
230 |
self.clean_old_results()
|
231 |
|
232 |
# Start global timer
|
backend/tests/run_bench.py
CHANGED
@@ -20,4 +20,4 @@ subprocess.run(command, env=os.environ)
|
|
20 |
|
21 |
# Calculate and print execution time
|
22 |
execution_time = time.time() - start_time
|
23 |
-
print(f"\
|
|
|
20 |
|
21 |
# Calculate and print execution time
|
22 |
execution_time = time.time() - start_time
|
23 |
+
print(f"\nExecution time: {execution_time:.2f} seconds")
|
backend/tests/run_lighteval.py
CHANGED
@@ -22,7 +22,7 @@ yourbench = create_yourbench_task("yourbench/yourbench_fbfe278f-70c8-4579-9447-8
|
|
22 |
TASKS_TABLE = [yourbench]
|
23 |
""")
|
24 |
|
25 |
-
#
|
26 |
output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
27 |
|
28 |
# LightEval command
|
@@ -48,8 +48,8 @@ subprocess.run(cmd_args, env=os.environ)
|
|
48 |
|
49 |
# Calculate and print execution time
|
50 |
execution_time = time.time() - start_time
|
51 |
-
print(f"\
|
52 |
-
print(f"
|
53 |
|
54 |
# Clean up
|
55 |
os.unlink(temp_file_path)
|
|
|
22 |
TASKS_TABLE = [yourbench]
|
23 |
""")
|
24 |
|
25 |
+
# Create an output folder with timestamp to avoid overwriting previous results
|
26 |
output_dir = f"data/lighteval_results_strict_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
27 |
|
28 |
# LightEval command
|
|
|
48 |
|
49 |
# Calculate and print execution time
|
50 |
execution_time = time.time() - start_time
|
51 |
+
print(f"\nExecution time: {execution_time:.2f} seconds")
|
52 |
+
print(f"Results saved in: {output_dir}")
|
53 |
|
54 |
# Clean up
|
55 |
os.unlink(temp_file_path)
|
frontend/src/App.js
CHANGED
@@ -17,12 +17,12 @@ import BenchmarkDisplayPage from "./pages/BenchmarkDisplayPage";
|
|
17 |
import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
|
18 |
import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
|
19 |
|
20 |
-
//
|
21 |
const syncURLWithParent = () => {
|
22 |
-
//
|
23 |
if (window.parent !== window) {
|
24 |
try {
|
25 |
-
//
|
26 |
window.parent.postMessage(
|
27 |
{
|
28 |
hash: window.location.hash,
|
@@ -30,7 +30,7 @@ const syncURLWithParent = () => {
|
|
30 |
"https://huggingface.co"
|
31 |
);
|
32 |
|
33 |
-
//
|
34 |
console.log("Synced hash with parent:", window.location.hash);
|
35 |
} catch (error) {
|
36 |
console.error("Error syncing URL with parent:", error);
|
@@ -42,36 +42,36 @@ function App() {
|
|
42 |
const { mode } = useThemeMode();
|
43 |
const theme = getTheme(mode);
|
44 |
|
45 |
-
//
|
46 |
useEffect(() => {
|
47 |
-
//
|
48 |
const handleHashChange = () => {
|
49 |
syncURLWithParent();
|
50 |
};
|
51 |
|
52 |
-
//
|
53 |
const handleParentMessage = (event) => {
|
54 |
-
//
|
55 |
if (event.origin === "https://huggingface.co") {
|
56 |
-
//
|
57 |
if (event.data.hash && event.data.hash !== window.location.hash) {
|
58 |
console.log("Received hash from parent:", event.data.hash);
|
59 |
-
//
|
60 |
window.location.hash = event.data.hash;
|
61 |
}
|
62 |
}
|
63 |
};
|
64 |
|
65 |
-
//
|
66 |
syncURLWithParent();
|
67 |
|
68 |
-
//
|
69 |
window.addEventListener("hashchange", handleHashChange);
|
70 |
|
71 |
-
//
|
72 |
window.addEventListener("message", handleParentMessage);
|
73 |
|
74 |
-
//
|
75 |
return () => {
|
76 |
window.removeEventListener("hashchange", handleHashChange);
|
77 |
window.removeEventListener("message", handleParentMessage);
|
|
|
17 |
import BenchmarkEvaluationPage from "./pages/BenchmarkEvaluationPage";
|
18 |
import EvaluationDisplayPage from "./pages/EvaluationDisplayPage";
|
19 |
|
20 |
+
// Function to synchronize URL hash with parent Hugging Face page
|
21 |
const syncURLWithParent = () => {
|
22 |
+
// This function is only necessary in a Hugging Face Spaces environment
|
23 |
if (window.parent !== window) {
|
24 |
try {
|
25 |
+
// Send the current hash to the parent page (Hugging Face)
|
26 |
window.parent.postMessage(
|
27 |
{
|
28 |
hash: window.location.hash,
|
|
|
30 |
"https://huggingface.co"
|
31 |
);
|
32 |
|
33 |
+
// Log for debugging
|
34 |
console.log("Synced hash with parent:", window.location.hash);
|
35 |
} catch (error) {
|
36 |
console.error("Error syncing URL with parent:", error);
|
|
|
42 |
const { mode } = useThemeMode();
|
43 |
const theme = getTheme(mode);
|
44 |
|
45 |
+
// Effect to monitor hash changes and synchronize them
|
46 |
useEffect(() => {
|
47 |
+
// Event handler function for hash changes
|
48 |
const handleHashChange = () => {
|
49 |
syncURLWithParent();
|
50 |
};
|
51 |
|
52 |
+
// Function to handle messages received from the parent page
|
53 |
const handleParentMessage = (event) => {
|
54 |
+
// Verify that the message comes from Hugging Face
|
55 |
if (event.origin === "https://huggingface.co") {
|
56 |
+
// If the message contains a hash and it's different from the current hash
|
57 |
if (event.data.hash && event.data.hash !== window.location.hash) {
|
58 |
console.log("Received hash from parent:", event.data.hash);
|
59 |
+
// Update the URL hash without reloading the page
|
60 |
window.location.hash = event.data.hash;
|
61 |
}
|
62 |
}
|
63 |
};
|
64 |
|
65 |
+
// Synchronize on initial load
|
66 |
syncURLWithParent();
|
67 |
|
68 |
+
// Listen for hash changes
|
69 |
window.addEventListener("hashchange", handleHashChange);
|
70 |
|
71 |
+
// Listen for messages from the parent page
|
72 |
window.addEventListener("message", handleParentMessage);
|
73 |
|
74 |
+
// Cleanup
|
75 |
return () => {
|
76 |
window.removeEventListener("hashchange", handleHashChange);
|
77 |
window.removeEventListener("message", handleParentMessage);
|
frontend/src/components/BenchmarkCreateForm.jsx
CHANGED
@@ -96,6 +96,17 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
96 |
success: false,
|
97 |
message: "Only PDF, TXT, HTML and MD files are accepted",
|
98 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
return;
|
100 |
}
|
101 |
|
@@ -152,6 +163,7 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
152 |
const file = e.dataTransfer.files[0];
|
153 |
if (!file) {
|
154 |
setUploadStatus({ success: false, message: "No file detected" });
|
|
|
155 |
return;
|
156 |
}
|
157 |
|
@@ -166,6 +178,17 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
166 |
success: false,
|
167 |
message: "Only PDF, TXT, HTML and MD files are accepted",
|
168 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
return;
|
170 |
}
|
171 |
|
|
|
96 |
success: false,
|
97 |
message: "Only PDF, TXT, HTML and MD files are accepted",
|
98 |
});
|
99 |
+
setOpenSnackbar(true);
|
100 |
+
return;
|
101 |
+
}
|
102 |
+
|
103 |
+
// Check file size limit (1MB = 1048576 bytes)
|
104 |
+
if (file.size > 1048576) {
|
105 |
+
setUploadStatus({
|
106 |
+
success: false,
|
107 |
+
message: "File size exceeds the 1MB limit",
|
108 |
+
});
|
109 |
+
setOpenSnackbar(true);
|
110 |
return;
|
111 |
}
|
112 |
|
|
|
163 |
const file = e.dataTransfer.files[0];
|
164 |
if (!file) {
|
165 |
setUploadStatus({ success: false, message: "No file detected" });
|
166 |
+
setOpenSnackbar(true);
|
167 |
return;
|
168 |
}
|
169 |
|
|
|
178 |
success: false,
|
179 |
message: "Only PDF, TXT, HTML and MD files are accepted",
|
180 |
});
|
181 |
+
setOpenSnackbar(true);
|
182 |
+
return;
|
183 |
+
}
|
184 |
+
|
185 |
+
// Check file size limit (1MB = 1048576 bytes)
|
186 |
+
if (file.size > 1048576) {
|
187 |
+
setUploadStatus({
|
188 |
+
success: false,
|
189 |
+
message: "File size exceeds the 1MB limit",
|
190 |
+
});
|
191 |
+
setOpenSnackbar(true);
|
192 |
return;
|
193 |
}
|
194 |
|
frontend/src/components/BenchmarkDisplay.jsx
CHANGED
@@ -10,8 +10,6 @@ import {
|
|
10 |
Link,
|
11 |
CircularProgress,
|
12 |
Tooltip,
|
13 |
-
useTheme,
|
14 |
-
useMediaQuery,
|
15 |
} from "@mui/material";
|
16 |
import PlayArrowIcon from "@mui/icons-material/PlayArrow";
|
17 |
import AssessmentIcon from "@mui/icons-material/Assessment";
|
@@ -40,9 +38,7 @@ const BenchmarkDisplay = ({
|
|
40 |
}) => {
|
41 |
const [isDownloading, setIsDownloading] = useState(false);
|
42 |
const { mode } = useThemeMode();
|
43 |
-
const theme =
|
44 |
-
const isMobile = useMediaQuery(theme.breakpoints.down("sm"));
|
45 |
-
const themeColors = getTheme(mode);
|
46 |
|
47 |
// Default questions if none provided
|
48 |
const questions =
|
@@ -99,10 +95,8 @@ const BenchmarkDisplay = ({
|
|
99 |
sx={{
|
100 |
mb: 4,
|
101 |
display: "flex",
|
102 |
-
flexDirection: isMobile ? "column" : "row",
|
103 |
justifyContent: "space-between",
|
104 |
-
alignItems:
|
105 |
-
gap: isMobile ? 2 : 0,
|
106 |
}}
|
107 |
>
|
108 |
<Box sx={{ display: "flex", alignItems: "center" }}>
|
@@ -110,14 +104,7 @@ const BenchmarkDisplay = ({
|
|
110 |
<Typography variant="h6">Benchmark Created Successfully</Typography>
|
111 |
</Box>
|
112 |
|
113 |
-
<Box
|
114 |
-
sx={{
|
115 |
-
display: "flex",
|
116 |
-
gap: 2,
|
117 |
-
width: isMobile ? "100%" : "auto",
|
118 |
-
flexDirection: isMobile ? "column" : "row",
|
119 |
-
}}
|
120 |
-
>
|
121 |
<Tooltip title="Download the complete benchmark">
|
122 |
<Button
|
123 |
variant="outlined"
|
@@ -131,7 +118,6 @@ const BenchmarkDisplay = ({
|
|
131 |
}
|
132 |
onClick={handleDownloadClick}
|
133 |
disabled={isDownloading || !sessionId}
|
134 |
-
fullWidth={isMobile}
|
135 |
>
|
136 |
{isDownloading ? "Downloading..." : "Download Benchmark"}
|
137 |
</Button>
|
@@ -142,7 +128,6 @@ const BenchmarkDisplay = ({
|
|
142 |
color="primary"
|
143 |
startIcon={<AssessmentIcon />}
|
144 |
onClick={handleEvaluationClick}
|
145 |
-
fullWidth={isMobile}
|
146 |
>
|
147 |
Start Evaluation
|
148 |
</Button>
|
@@ -160,8 +145,8 @@ const BenchmarkDisplay = ({
|
|
160 |
variant="outlined"
|
161 |
sx={{
|
162 |
mb: 2,
|
163 |
-
backgroundColor:
|
164 |
-
border: `1px solid ${
|
165 |
}}
|
166 |
>
|
167 |
<CardContent>
|
|
|
10 |
Link,
|
11 |
CircularProgress,
|
12 |
Tooltip,
|
|
|
|
|
13 |
} from "@mui/material";
|
14 |
import PlayArrowIcon from "@mui/icons-material/PlayArrow";
|
15 |
import AssessmentIcon from "@mui/icons-material/Assessment";
|
|
|
38 |
}) => {
|
39 |
const [isDownloading, setIsDownloading] = useState(false);
|
40 |
const { mode } = useThemeMode();
|
41 |
+
const theme = getTheme(mode);
|
|
|
|
|
42 |
|
43 |
// Default questions if none provided
|
44 |
const questions =
|
|
|
95 |
sx={{
|
96 |
mb: 4,
|
97 |
display: "flex",
|
|
|
98 |
justifyContent: "space-between",
|
99 |
+
alignItems: "center",
|
|
|
100 |
}}
|
101 |
>
|
102 |
<Box sx={{ display: "flex", alignItems: "center" }}>
|
|
|
104 |
<Typography variant="h6">Benchmark Created Successfully</Typography>
|
105 |
</Box>
|
106 |
|
107 |
+
<Box sx={{ display: "flex", gap: 2 }}>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
<Tooltip title="Download the complete benchmark">
|
109 |
<Button
|
110 |
variant="outlined"
|
|
|
118 |
}
|
119 |
onClick={handleDownloadClick}
|
120 |
disabled={isDownloading || !sessionId}
|
|
|
121 |
>
|
122 |
{isDownloading ? "Downloading..." : "Download Benchmark"}
|
123 |
</Button>
|
|
|
128 |
color="primary"
|
129 |
startIcon={<AssessmentIcon />}
|
130 |
onClick={handleEvaluationClick}
|
|
|
131 |
>
|
132 |
Start Evaluation
|
133 |
</Button>
|
|
|
145 |
variant="outlined"
|
146 |
sx={{
|
147 |
mb: 2,
|
148 |
+
backgroundColor: theme.palette.background.subtle,
|
149 |
+
border: `1px solid ${theme.palette.divider}`,
|
150 |
}}
|
151 |
>
|
152 |
<CardContent>
|
frontend/src/pages/BenchmarkDisplayPage.jsx
CHANGED
@@ -22,7 +22,7 @@ function BenchmarkDisplayPage() {
|
|
22 |
console.log("BenchmarkDisplayPage useEffect - sessionId:", sessionId);
|
23 |
|
24 |
if (!sessionId) {
|
25 |
-
console.log("
|
26 |
setIsValidSession(false);
|
27 |
return;
|
28 |
}
|
@@ -30,39 +30,34 @@ function BenchmarkDisplayPage() {
|
|
30 |
setIsLoading(true);
|
31 |
|
32 |
const fetchBenchmarkQuestions = async () => {
|
33 |
-
console.log(
|
34 |
-
"Tentative de récupération des questions pour la session:",
|
35 |
-
sessionId
|
36 |
-
);
|
37 |
try {
|
38 |
const apiUrl = `${API_CONFIG.BASE_URL}/benchmark-questions/${sessionId}`;
|
39 |
-
console.log("
|
40 |
|
41 |
const response = await fetch(apiUrl);
|
42 |
-
console.log("
|
43 |
|
44 |
if (!response.ok) {
|
45 |
if (response.status === 404) {
|
46 |
-
console.error("Session
|
47 |
setIsValidSession(false);
|
48 |
return;
|
49 |
} else {
|
50 |
-
console.error(`
|
51 |
setIsLoading(false);
|
52 |
return;
|
53 |
}
|
54 |
}
|
55 |
|
56 |
const data = await response.json();
|
57 |
-
console.log("
|
58 |
|
59 |
if (data.success && data.questions && data.questions.length > 0) {
|
60 |
-
console.log("Questions
|
61 |
setBenchmarkQuestions(data.questions);
|
62 |
} else {
|
63 |
-
console.warn(
|
64 |
-
"Échec du chargement des questions, utilisation des valeurs par défaut"
|
65 |
-
);
|
66 |
}
|
67 |
|
68 |
if (data.dataset_url) {
|
@@ -70,10 +65,10 @@ function BenchmarkDisplayPage() {
|
|
70 |
} else {
|
71 |
const url = `https://huggingface.co/datasets/yourbench/yourbench_${sessionId}`;
|
72 |
setDatasetUrl(url);
|
73 |
-
console.log("URL
|
74 |
}
|
75 |
} catch (error) {
|
76 |
-
console.error("
|
77 |
setIsValidSession(false);
|
78 |
} finally {
|
79 |
setIsLoading(false);
|
|
|
22 |
console.log("BenchmarkDisplayPage useEffect - sessionId:", sessionId);
|
23 |
|
24 |
if (!sessionId) {
|
25 |
+
console.log("Missing session ID, redirecting to home");
|
26 |
setIsValidSession(false);
|
27 |
return;
|
28 |
}
|
|
|
30 |
setIsLoading(true);
|
31 |
|
32 |
const fetchBenchmarkQuestions = async () => {
|
33 |
+
console.log("Attempting to fetch questions for session:", sessionId);
|
|
|
|
|
|
|
34 |
try {
|
35 |
const apiUrl = `${API_CONFIG.BASE_URL}/benchmark-questions/${sessionId}`;
|
36 |
+
console.log("API call:", apiUrl);
|
37 |
|
38 |
const response = await fetch(apiUrl);
|
39 |
+
console.log("API response received:", response.status);
|
40 |
|
41 |
if (!response.ok) {
|
42 |
if (response.status === 404) {
|
43 |
+
console.error("Session not found");
|
44 |
setIsValidSession(false);
|
45 |
return;
|
46 |
} else {
|
47 |
+
console.error(`Server error: ${response.status}`);
|
48 |
setIsLoading(false);
|
49 |
return;
|
50 |
}
|
51 |
}
|
52 |
|
53 |
const data = await response.json();
|
54 |
+
console.log("API data:", data);
|
55 |
|
56 |
if (data.success && data.questions && data.questions.length > 0) {
|
57 |
+
console.log("Questions loaded successfully:", data.questions);
|
58 |
setBenchmarkQuestions(data.questions);
|
59 |
} else {
|
60 |
+
console.warn("Failed to load questions, using default values");
|
|
|
|
|
61 |
}
|
62 |
|
63 |
if (data.dataset_url) {
|
|
|
65 |
} else {
|
66 |
const url = `https://huggingface.co/datasets/yourbench/yourbench_${sessionId}`;
|
67 |
setDatasetUrl(url);
|
68 |
+
console.log("Dataset URL generated:", url);
|
69 |
}
|
70 |
} catch (error) {
|
71 |
+
console.error("Error retrieving questions:", error);
|
72 |
setIsValidSession(false);
|
73 |
} finally {
|
74 |
setIsLoading(false);
|
frontend/src/pages/BenchmarkEvaluationPage.jsx
CHANGED
@@ -17,14 +17,12 @@ function BenchmarkEvaluationPage() {
|
|
17 |
|
18 |
useEffect(() => {
|
19 |
if (!sessionId) {
|
20 |
-
console.log(
|
21 |
-
"Session ID manquante pour l'évaluation, redirection vers l'accueil"
|
22 |
-
);
|
23 |
setIsValidSession(false);
|
24 |
return;
|
25 |
}
|
26 |
|
27 |
-
//
|
28 |
if (isDefault) {
|
29 |
setIsLoading(false);
|
30 |
return;
|
@@ -37,13 +35,11 @@ function BenchmarkEvaluationPage() {
|
|
37 |
);
|
38 |
|
39 |
if (!response.ok) {
|
40 |
-
console.error(
|
41 |
-
`Session invalide ou erreur serveur: ${response.status}`
|
42 |
-
);
|
43 |
setIsValidSession(false);
|
44 |
}
|
45 |
} catch (error) {
|
46 |
-
console.error("
|
47 |
setIsValidSession(false);
|
48 |
} finally {
|
49 |
setIsLoading(false);
|
@@ -54,8 +50,8 @@ function BenchmarkEvaluationPage() {
|
|
54 |
}, [sessionId, isDefault]);
|
55 |
|
56 |
const handleEvaluationComplete = (result) => {
|
57 |
-
console.log("
|
58 |
-
//
|
59 |
};
|
60 |
|
61 |
if (!isValidSession) {
|
|
|
17 |
|
18 |
useEffect(() => {
|
19 |
if (!sessionId) {
|
20 |
+
console.log("Missing session ID for evaluation, redirecting to home");
|
|
|
|
|
21 |
setIsValidSession(false);
|
22 |
return;
|
23 |
}
|
24 |
|
25 |
+
// If it's a precalculated document, consider it valid directly
|
26 |
if (isDefault) {
|
27 |
setIsLoading(false);
|
28 |
return;
|
|
|
35 |
);
|
36 |
|
37 |
if (!response.ok) {
|
38 |
+
console.error(`Invalid session or server error: ${response.status}`);
|
|
|
|
|
39 |
setIsValidSession(false);
|
40 |
}
|
41 |
} catch (error) {
|
42 |
+
console.error("Error checking session:", error);
|
43 |
setIsValidSession(false);
|
44 |
} finally {
|
45 |
setIsLoading(false);
|
|
|
50 |
}, [sessionId, isDefault]);
|
51 |
|
52 |
const handleEvaluationComplete = (result) => {
|
53 |
+
console.log("Evaluation completed:", result);
|
54 |
+
// Redirection is handled by the BenchmarkEvaluation component
|
55 |
};
|
56 |
|
57 |
if (!isValidSession) {
|
test_import.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
try:
|
2 |
import lighteval_task
|
3 |
-
print("lighteval_task
|
4 |
except ImportError as e:
|
5 |
-
print(f"
|
|
|
1 |
try:
|
2 |
import lighteval_task
|
3 |
+
print("lighteval_task imported successfully!")
|
4 |
except ImportError as e:
|
5 |
+
print(f"Error: {e}")
|