demo / frontend /src /components /BenchmarkEvaluation.jsx
tfrere's picture
add moder provider switching to eval
4fb52f5
raw
history blame
12.1 kB
import React, { useState, useEffect, useRef } from "react";
import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
import { useNavigate, useSearchParams } from "react-router-dom";
import API_CONFIG from "../config/api";
// Temps de simulation en millisecondes pour les documents précalculés
const SIMULATION_DURATION = 70000; // 20 secondes
// Intervalle de changement des messages pour les documents standards vs précalculés
const MESSAGE_CHANGE_INTERVAL = {
DEFAULT: 25000, // 20 secondes pour documents standards
PRECALCULATED: 25000, // 5 secondes pour documents précalculés
};
// Starting messages with their timing
const STARTING_MESSAGES = [
{ message: "Initializing evaluation environment...", step: 1, totalSteps: 5 },
{ message: "Finding available model providers...", step: 2, totalSteps: 5 },
{ message: "Starting evaluation process...", step: 3, totalSteps: 5 },
{ message: "Evaluating models...", step: 4, totalSteps: 5 },
{ message: "Storing evaluation results...", step: 5, totalSteps: 5 },
];
const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
const [searchParams] = useSearchParams();
const isDefault =
isDefaultDocument ||
["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
const [evaluationComplete, setEvaluationComplete] = useState(false);
const [error, setError] = useState(null);
const [elapsedTime, setElapsedTime] = useState(0);
const [startingMessageIndex, setStartingMessageIndex] = useState(0);
const [evaluationStarted, setEvaluationStarted] = useState(false);
const timerIntervalRef = useRef(null);
const startTimeRef = useRef(null);
const startingMessageIntervalRef = useRef(null);
const pollingIntervalRef = useRef(null);
const simulationTimeoutRef = useRef(null);
const navigate = useNavigate();
// Add effect to handle automatic redirection when evaluation is complete
useEffect(() => {
if (evaluationComplete) {
navigate(`/evaluation-display?session=${sessionId}`);
}
}, [evaluationComplete, sessionId, navigate]);
// Add effect to handle starting messages
useEffect(() => {
startingMessageIntervalRef.current = setInterval(
() => {
setStartingMessageIndex((prev) => {
if (prev < STARTING_MESSAGES.length - 1) {
return prev + 1;
}
return prev;
});
},
isDefault
? MESSAGE_CHANGE_INTERVAL.PRECALCULATED
: MESSAGE_CHANGE_INTERVAL.DEFAULT
);
return () => {
if (startingMessageIntervalRef.current) {
clearInterval(startingMessageIntervalRef.current);
}
};
}, [isDefault]);
// Start evaluation when component mounts
useEffect(() => {
// Set start time
startTimeRef.current = Date.now();
// Start timer
timerIntervalRef.current = setInterval(() => {
const timeElapsed = Math.floor(
(Date.now() - startTimeRef.current) / 1000
);
setElapsedTime(timeElapsed);
}, 1000);
// Gestionnaire pour détecter quand la page redevient visible
const handleVisibilityChange = () => {
if (
document.visibilityState === "visible" &&
!isDefault &&
!evaluationComplete &&
evaluationStarted // Vérifier si l'évaluation a déjà commencé
) {
console.log("Page became visible, checking evaluation status...");
// Force une nouvelle requête pour récupérer l'état d'évaluation
const checkEvaluationStatus = async () => {
try {
const logsResponse = await fetch(
`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
);
if (logsResponse.ok) {
const logsResult = await logsResponse.json();
if (logsResult.is_completed) {
// Mettre fin à l'évaluation si elle est terminée
setEvaluationComplete(true);
// Avancer à la dernière étape des messages
setStartingMessageIndex(STARTING_MESSAGES.length - 1);
// Nettoyer les intervalles
if (pollingIntervalRef.current) {
clearInterval(pollingIntervalRef.current);
}
if (startingMessageIntervalRef.current) {
clearInterval(startingMessageIntervalRef.current);
}
} else {
// Si l'évaluation est toujours en cours, mettre à jour l'étape actuelle
// basée sur le temps écoulé
const progress = Math.min(
Math.floor(
(Date.now() - startTimeRef.current) /
MESSAGE_CHANGE_INTERVAL.DEFAULT
),
STARTING_MESSAGES.length - 1
);
setStartingMessageIndex(progress);
}
}
} catch (error) {
console.error("Error checking evaluation status:", error);
}
};
checkEvaluationStatus();
}
};
// Ajouter l'écouteur pour le changement de visibilité
document.addEventListener("visibilitychange", handleVisibilityChange);
if (isDefault) {
simulateEvaluation();
} else {
// Démarrer l'évaluation seulement si elle n'a pas déjà été lancée
if (!evaluationStarted) {
startEvaluation();
}
}
// Clean up intervals on unmount
return () => {
if (pollingIntervalRef.current) {
clearInterval(pollingIntervalRef.current);
}
if (timerIntervalRef.current) {
clearInterval(timerIntervalRef.current);
}
if (simulationTimeoutRef.current) {
clearTimeout(simulationTimeoutRef.current);
}
document.removeEventListener("visibilitychange", handleVisibilityChange);
};
}, [isDefault, sessionId, evaluationComplete, evaluationStarted]);
// Simulate the evaluation process for pre-calculated documents
const simulateEvaluation = () => {
// Complete after 20 seconds
simulationTimeoutRef.current = setTimeout(() => {
setEvaluationComplete(true);
if (startingMessageIntervalRef.current) {
clearInterval(startingMessageIntervalRef.current);
}
setStartingMessageIndex(STARTING_MESSAGES.length - 1); // Set to last message
}, SIMULATION_DURATION);
};
// Format elapsed time as HH:MM:SS
const formatElapsedTime = () => {
const hours = Math.floor(elapsedTime / 3600);
const minutes = Math.floor((elapsedTime % 3600) / 60);
const seconds = elapsedTime % 60;
return [
hours.toString().padStart(2, "0"),
minutes.toString().padStart(2, "0"),
seconds.toString().padStart(2, "0"),
].join(":");
};
// Start benchmark evaluation
const startEvaluation = async () => {
if (!sessionId) {
setError("Missing session ID");
return;
}
// Marquer que l'évaluation a commencé
setEvaluationStarted(true);
try {
// Call API to start evaluation
const response = await fetch(
`${API_CONFIG.BASE_URL}/evaluate-benchmark`,
{
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
session_id: sessionId,
}),
}
);
const result = await response.json();
if (response.ok) {
// Set up polling to check completion
pollingIntervalRef.current = setInterval(async () => {
try {
const logsResponse = await fetch(
`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
);
if (logsResponse.ok) {
const logsResult = await logsResponse.json();
// Vérifier si l'évaluation est terminée
if (logsResult.is_completed) {
setEvaluationComplete(true);
// Avancer à la dernière étape du message
setStartingMessageIndex(STARTING_MESSAGES.length - 1);
// Arrêter les intervalles
clearInterval(pollingIntervalRef.current);
if (startingMessageIntervalRef.current) {
clearInterval(startingMessageIntervalRef.current);
}
} else {
// Si l'évaluation est toujours en cours, estimer la progression
// en fonction du temps écoulé
const elapsedSinceStart = Date.now() - startTimeRef.current;
// Estimer la progression (en supposant qu'une évaluation prend environ 80 secondes)
const estimatedTotalTime = 80000; // 80 secondes
const estimatedProgress = Math.min(
elapsedSinceStart / estimatedTotalTime,
1
);
// Calculer l'étape estimée (0 à STARTING_MESSAGES.length - 1)
const estimatedStepIndex = Math.min(
Math.floor(estimatedProgress * STARTING_MESSAGES.length),
STARTING_MESSAGES.length - 1
);
// Mettre à jour l'index des messages de démarrage si nécessaire
if (estimatedStepIndex > startingMessageIndex) {
setStartingMessageIndex(estimatedStepIndex);
}
}
}
} catch (error) {
console.log("Error polling logs:", error);
// Ne pas arrêter le polling en cas d'erreurs réseau temporaires
}
}, 2000);
} else {
setError(result.error || "Benchmark evaluation failed");
}
} catch (error) {
console.error("Error starting evaluation:", error);
setError("Error connecting to server");
}
};
return (
<Paper
elevation={3}
sx={{
p: 4,
mt: 3,
mb: 3,
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
minHeight: 200,
position: "relative",
}}
>
{/* Temps estimé */}
<Box
sx={{
position: "absolute",
top: 12,
right: 12,
backgroundColor: "rgba(0, 0, 0, 0.04)",
borderRadius: "4px",
px: 1,
py: 0.5,
display: "inline-flex",
alignItems: "center",
}}
>
<Typography
variant="caption"
sx={{
fontSize: "0.675rem",
color: "text.secondary",
fontWeight: 500,
}}
>
Estimated time ~ 1min 30s
</Typography>
</Box>
{error ? (
<Alert severity="error" sx={{ width: "100%" }}>
{error}
</Alert>
) : (
<>
{evaluationComplete ? (
<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
Evaluation completed successfully!
</Alert>
) : (
<>
<CircularProgress size={60} sx={{ mb: 2 }} />
<Typography variant="h6" component="div" gutterBottom>
Benchmark evaluation...
</Typography>
{/* Step progress indicator */}
<Typography variant="body1" color="text.secondary">
{`${STARTING_MESSAGES[startingMessageIndex].message} (${STARTING_MESSAGES[startingMessageIndex].step}/${STARTING_MESSAGES[startingMessageIndex].totalSteps})`}
</Typography>
{/* Timer display */}
<Box
sx={{
display: "flex",
alignItems: "center",
mt: 1,
color: "text.secondary",
opacity: 0.5,
}}
>
<Typography variant="body2">{formatElapsedTime()}</Typography>
</Box>
</>
)}
</>
)}
</Paper>
);
};
export default BenchmarkEvaluation;