Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

demo / frontend /src /components /BenchmarkEvaluation.jsx

tfrere

add moder provider switching to eval

4fb52f5 4 months ago

raw

history blame

12.1 kB

	import React, { useState, useEffect, useRef } from "react";
	import { Box, Typography, CircularProgress, Alert, Paper } from "@mui/material";
	import { useNavigate, useSearchParams } from "react-router-dom";
	import API_CONFIG from "../config/api";

	// Temps de simulation en millisecondes pour les documents précalculés
	const SIMULATION_DURATION = 70000; // 20 secondes

	// Intervalle de changement des messages pour les documents standards vs précalculés
	const MESSAGE_CHANGE_INTERVAL = {
	DEFAULT: 25000, // 20 secondes pour documents standards
	PRECALCULATED: 25000, // 5 secondes pour documents précalculés
	};

	// Starting messages with their timing
	const STARTING_MESSAGES = [
	{ message: "Initializing evaluation environment...", step: 1, totalSteps: 5 },
	{ message: "Finding available model providers...", step: 2, totalSteps: 5 },
	{ message: "Starting evaluation process...", step: 3, totalSteps: 5 },
	{ message: "Evaluating models...", step: 4, totalSteps: 5 },
	{ message: "Storing evaluation results...", step: 5, totalSteps: 5 },
	];

	const BenchmarkEvaluation = ({ sessionId, isDefaultDocument, onComplete }) => {
	const [searchParams] = useSearchParams();
	const isDefault =
	isDefaultDocument \|\|
	["the-bitter-lesson", "hurricane-faq", "pokemon-guide"].includes(sessionId);
	const [evaluationComplete, setEvaluationComplete] = useState(false);
	const [error, setError] = useState(null);
	const [elapsedTime, setElapsedTime] = useState(0);
	const [startingMessageIndex, setStartingMessageIndex] = useState(0);
	const [evaluationStarted, setEvaluationStarted] = useState(false);

	const timerIntervalRef = useRef(null);
	const startTimeRef = useRef(null);
	const startingMessageIntervalRef = useRef(null);
	const pollingIntervalRef = useRef(null);
	const simulationTimeoutRef = useRef(null);

	const navigate = useNavigate();

	// Add effect to handle automatic redirection when evaluation is complete
	useEffect(() => {
	if (evaluationComplete) {
	navigate(`/evaluation-display?session=${sessionId}`);
	}
	}, [evaluationComplete, sessionId, navigate]);

	// Add effect to handle starting messages
	useEffect(() => {
	startingMessageIntervalRef.current = setInterval(
	() => {
	setStartingMessageIndex((prev) => {
	if (prev < STARTING_MESSAGES.length - 1) {
	return prev + 1;
	}
	return prev;
	});
	},
	isDefault
	? MESSAGE_CHANGE_INTERVAL.PRECALCULATED
	: MESSAGE_CHANGE_INTERVAL.DEFAULT
	);

	return () => {
	if (startingMessageIntervalRef.current) {
	clearInterval(startingMessageIntervalRef.current);
	}
	};
	}, [isDefault]);

	// Start evaluation when component mounts
	useEffect(() => {
	// Set start time
	startTimeRef.current = Date.now();

	// Start timer
	timerIntervalRef.current = setInterval(() => {
	const timeElapsed = Math.floor(
	(Date.now() - startTimeRef.current) / 1000
	);
	setElapsedTime(timeElapsed);
	}, 1000);

	// Gestionnaire pour détecter quand la page redevient visible
	const handleVisibilityChange = () => {
	if (
	document.visibilityState === "visible" &&
	!isDefault &&
	!evaluationComplete &&
	evaluationStarted // Vérifier si l'évaluation a déjà commencé
	) {
	console.log("Page became visible, checking evaluation status...");
	// Force une nouvelle requête pour récupérer l'état d'évaluation
	const checkEvaluationStatus = async () => {
	try {
	const logsResponse = await fetch(
	`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
	);

	if (logsResponse.ok) {
	const logsResult = await logsResponse.json();
	if (logsResult.is_completed) {
	// Mettre fin à l'évaluation si elle est terminée
	setEvaluationComplete(true);

	// Avancer à la dernière étape des messages
	setStartingMessageIndex(STARTING_MESSAGES.length - 1);

	// Nettoyer les intervalles
	if (pollingIntervalRef.current) {
	clearInterval(pollingIntervalRef.current);
	}
	if (startingMessageIntervalRef.current) {
	clearInterval(startingMessageIntervalRef.current);
	}
	} else {
	// Si l'évaluation est toujours en cours, mettre à jour l'étape actuelle
	// basée sur le temps écoulé
	const progress = Math.min(
	Math.floor(
	(Date.now() - startTimeRef.current) /
	MESSAGE_CHANGE_INTERVAL.DEFAULT
	),
	STARTING_MESSAGES.length - 1
	);
	setStartingMessageIndex(progress);
	}
	}
	} catch (error) {
	console.error("Error checking evaluation status:", error);
	}
	};

	checkEvaluationStatus();
	}
	};

	// Ajouter l'écouteur pour le changement de visibilité
	document.addEventListener("visibilitychange", handleVisibilityChange);

	if (isDefault) {
	simulateEvaluation();
	} else {
	// Démarrer l'évaluation seulement si elle n'a pas déjà été lancée
	if (!evaluationStarted) {
	startEvaluation();
	}
	}

	// Clean up intervals on unmount
	return () => {
	if (pollingIntervalRef.current) {
	clearInterval(pollingIntervalRef.current);
	}
	if (timerIntervalRef.current) {
	clearInterval(timerIntervalRef.current);
	}
	if (simulationTimeoutRef.current) {
	clearTimeout(simulationTimeoutRef.current);
	}
	document.removeEventListener("visibilitychange", handleVisibilityChange);
	};
	}, [isDefault, sessionId, evaluationComplete, evaluationStarted]);

	// Simulate the evaluation process for pre-calculated documents
	const simulateEvaluation = () => {
	// Complete after 20 seconds
	simulationTimeoutRef.current = setTimeout(() => {
	setEvaluationComplete(true);

	if (startingMessageIntervalRef.current) {
	clearInterval(startingMessageIntervalRef.current);
	}

	setStartingMessageIndex(STARTING_MESSAGES.length - 1); // Set to last message
	}, SIMULATION_DURATION);
	};

	// Format elapsed time as HH:MM:SS
	const formatElapsedTime = () => {
	const hours = Math.floor(elapsedTime / 3600);
	const minutes = Math.floor((elapsedTime % 3600) / 60);
	const seconds = elapsedTime % 60;

	return [
	hours.toString().padStart(2, "0"),
	minutes.toString().padStart(2, "0"),
	seconds.toString().padStart(2, "0"),
	].join(":");
	};

	// Start benchmark evaluation
	const startEvaluation = async () => {
	if (!sessionId) {
	setError("Missing session ID");
	return;
	}

	// Marquer que l'évaluation a commencé
	setEvaluationStarted(true);

	try {
	// Call API to start evaluation
	const response = await fetch(
	`${API_CONFIG.BASE_URL}/evaluate-benchmark`,
	{
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	},
	body: JSON.stringify({
	session_id: sessionId,
	}),
	}
	);

	const result = await response.json();

	if (response.ok) {
	// Set up polling to check completion
	pollingIntervalRef.current = setInterval(async () => {
	try {
	const logsResponse = await fetch(
	`${API_CONFIG.BASE_URL}/evaluation-logs/${sessionId}`
	);

	if (logsResponse.ok) {
	const logsResult = await logsResponse.json();

	// Vérifier si l'évaluation est terminée
	if (logsResult.is_completed) {
	setEvaluationComplete(true);

	// Avancer à la dernière étape du message
	setStartingMessageIndex(STARTING_MESSAGES.length - 1);

	// Arrêter les intervalles
	clearInterval(pollingIntervalRef.current);
	if (startingMessageIntervalRef.current) {
	clearInterval(startingMessageIntervalRef.current);
	}
	} else {
	// Si l'évaluation est toujours en cours, estimer la progression
	// en fonction du temps écoulé
	const elapsedSinceStart = Date.now() - startTimeRef.current;

	// Estimer la progression (en supposant qu'une évaluation prend environ 80 secondes)
	const estimatedTotalTime = 80000; // 80 secondes
	const estimatedProgress = Math.min(
	elapsedSinceStart / estimatedTotalTime,
	1
	);

	// Calculer l'étape estimée (0 à STARTING_MESSAGES.length - 1)
	const estimatedStepIndex = Math.min(
	Math.floor(estimatedProgress * STARTING_MESSAGES.length),
	STARTING_MESSAGES.length - 1
	);

	// Mettre à jour l'index des messages de démarrage si nécessaire
	if (estimatedStepIndex > startingMessageIndex) {
	setStartingMessageIndex(estimatedStepIndex);
	}
	}
	}
	} catch (error) {
	console.log("Error polling logs:", error);
	// Ne pas arrêter le polling en cas d'erreurs réseau temporaires
	}
	}, 2000);
	} else {
	setError(result.error \|\| "Benchmark evaluation failed");
	}
	} catch (error) {
	console.error("Error starting evaluation:", error);
	setError("Error connecting to server");
	}
	};

	return (
	<Paper
	elevation={3}
	sx={{
	p: 4,
	mt: 3,
	mb: 3,
	display: "flex",
	flexDirection: "column",
	alignItems: "center",
	justifyContent: "center",
	minHeight: 200,
	position: "relative",
	}}
	>
	{/* Temps estimé */}
	<Box
	sx={{
	position: "absolute",
	top: 12,
	right: 12,
	backgroundColor: "rgba(0, 0, 0, 0.04)",
	borderRadius: "4px",
	px: 1,
	py: 0.5,
	display: "inline-flex",
	alignItems: "center",
	}}
	>
	<Typography
	variant="caption"
	sx={{
	fontSize: "0.675rem",
	color: "text.secondary",
	fontWeight: 500,
	}}
	>
	Estimated time ~ 1min 30s
	</Typography>
	</Box>

	{error ? (
	<Alert severity="error" sx={{ width: "100%" }}>
	{error}
	</Alert>
	) : (
	<>
	{evaluationComplete ? (
	<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
	Evaluation completed successfully!
	</Alert>
	) : (
	<>
	<CircularProgress size={60} sx={{ mb: 2 }} />
	<Typography variant="h6" component="div" gutterBottom>
	Benchmark evaluation...
	</Typography>

	{/* Step progress indicator */}
	<Typography variant="body1" color="text.secondary">
	{`${STARTING_MESSAGES[startingMessageIndex].message} (${STARTING_MESSAGES[startingMessageIndex].step}/${STARTING_MESSAGES[startingMessageIndex].totalSteps})`}
	</Typography>

	{/* Timer display */}
	<Box
	sx={{
	display: "flex",
	alignItems: "center",
	mt: 1,
	color: "text.secondary",
	opacity: 0.5,
	}}
	>
	<Typography variant="body2">{formatElapsedTime()}</Typography>
	</Box>
	</>
	)}
	</>
	)}
	</Paper>
	);
	};

	export default BenchmarkEvaluation;