import React, { useState, useEffect, useRef } from "react"; import { Box, Typography, CircularProgress, Alert, Paper, Divider, Button, } from "@mui/material"; import AccessTimeIcon from "@mui/icons-material/AccessTime"; import LogDisplay from "./LogDisplay"; import { useNavigate } from "react-router-dom"; // Evaluation steps const EVALUATION_STEPS = [ "preparation", "model_evaluation", "results_compilation", ]; // Friendly step names for display const STEP_LABELS = { preparation: "Preparation", model_evaluation: "Model Evaluation", results_compilation: "Results Compilation", }; /** * Component to handle benchmark evaluation and display logs * * @param {Object} props - Component props * @param {string} props.sessionId - Session ID of the benchmark to evaluate * @param {Function} props.onComplete - Function to call when evaluation is complete * @returns {JSX.Element} Benchmark evaluation component */ const BenchmarkEvaluation = ({ sessionId, onComplete }) => { const [evaluating, setEvaluating] = useState(false); const [evaluationComplete, setEvaluationComplete] = useState(false); const [evaluationLogs, setEvaluationLogs] = useState([]); const [error, setError] = useState(null); const [currentPhase, setCurrentPhase] = useState("initializing"); const [completedSteps, setCompletedSteps] = useState([]); const [activeStep, setActiveStep] = useState(0); const [elapsedTime, setElapsedTime] = useState(0); const pollingIntervalRef = useRef(null); const timerIntervalRef = useRef(null); const startTimeRef = useRef(null); const navigate = useNavigate(); // Start evaluation when component mounts useEffect(() => { // Set start time startTimeRef.current = Date.now(); // Start timer timerIntervalRef.current = setInterval(() => { const timeElapsed = Math.floor( (Date.now() - startTimeRef.current) / 1000 ); setElapsedTime(timeElapsed); }, 1000); startEvaluation(); // Clean up intervals on unmount return () => { if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); } if (timerIntervalRef.current) { clearInterval(timerIntervalRef.current); } }; }, []); // Determine current phase and completed steps from logs useEffect(() => { if (evaluationLogs.length === 0) return; // Check all logs for completed steps const newCompletedSteps = [...completedSteps]; let newActiveStep = activeStep; evaluationLogs.forEach((log) => { // Detect completed steps (format: [SUCCESS] Stage completed: step_name) const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/); if (match && match[1]) { const completedStep = match[1].trim(); if ( EVALUATION_STEPS.includes(completedStep) && !newCompletedSteps.includes(completedStep) ) { newCompletedSteps.push(completedStep); // Set active step to index of next step const stepIndex = EVALUATION_STEPS.indexOf(completedStep); if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) { newActiveStep = stepIndex + 1; if (newActiveStep >= EVALUATION_STEPS.length) { newActiveStep = EVALUATION_STEPS.length; } } } } }); // Update state if there are new completed steps if (newCompletedSteps.length > completedSteps.length) { setCompletedSteps(newCompletedSteps); setActiveStep(newActiveStep); } // Check recent logs to determine current phase const recentLogs = evaluationLogs.slice(-10); // Detect completion conditions const isComplete = recentLogs.some((log) => log.includes("[SUCCESS] Evaluation completed") ) || completedSteps.includes("results_compilation") || newCompletedSteps.includes("results_compilation"); if (isComplete) { setCurrentPhase("complete"); setEvaluationComplete(true); // Stop polling when evaluation is complete if (pollingIntervalRef.current) { clearInterval(pollingIntervalRef.current); } if (timerIntervalRef.current) { clearInterval(timerIntervalRef.current); } // Notify parent component that evaluation is complete if (onComplete) { onComplete({ success: true, sessionId, logs: evaluationLogs, }); } } else if (recentLogs.some((log) => log.includes("Comparing models"))) { setCurrentPhase("compiling_results"); } else if (recentLogs.some((log) => log.includes("Starting evaluations"))) { setCurrentPhase("evaluating"); } else if (recentLogs.some((log) => log.includes("Initialization"))) { setCurrentPhase("preparing"); } }, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]); // Format elapsed time as HH:MM:SS const formatElapsedTime = () => { const hours = Math.floor(elapsedTime / 3600); const minutes = Math.floor((elapsedTime % 3600) / 60); const seconds = elapsedTime % 60; return [ hours.toString().padStart(2, "0"), minutes.toString().padStart(2, "0"), seconds.toString().padStart(2, "0"), ].join(":"); }; // Start benchmark evaluation const startEvaluation = async () => { if (!sessionId) { setError("Missing session ID"); return; } setEvaluating(true); setEvaluationLogs([]); setError(null); setCurrentPhase("initializing"); setCompletedSteps([]); setActiveStep(0); try { // Call API to start evaluation const response = await fetch("http://localhost:3001/evaluate-benchmark", { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ session_id: sessionId, }), }); const result = await response.json(); if (response.ok) { setEvaluationLogs(result.logs || []); // Set up polling to retrieve more logs pollingIntervalRef.current = setInterval(async () => { // Check if we're already done if (evaluationComplete) { clearInterval(pollingIntervalRef.current); return; } try { // Call API to get latest logs const logsResponse = await fetch( `http://localhost:3001/evaluation-logs/${sessionId}` ); if (logsResponse.ok) { const logsResult = await logsResponse.json(); // Update logs if there are new ones if ( logsResult.logs && logsResult.logs.length > evaluationLogs.length ) { setEvaluationLogs(logsResult.logs); } // Check if evaluation is complete if (logsResult.is_completed) { setEvaluationComplete(true); clearInterval(pollingIntervalRef.current); } } } catch (error) { console.log("Error polling logs:", error); // Don't stop polling on network errors } }, 2000); // Poll every 2 seconds } else { // Handle error setEvaluationLogs([`Error: ${result.error || "Unknown error"}`]); setError(result.error || "Benchmark evaluation failed"); } } catch (error) { console.error("Error starting evaluation:", error); setEvaluationLogs([`Error: ${error.message || "Unknown error"}`]); setError("Error connecting to server"); } finally { setEvaluating(false); } }; // Get title based on current phase const getPhaseTitle = () => { switch (currentPhase) { case "initializing": return "Preparing evaluation..."; case "preparing": return "Preparing models..."; case "evaluating": return "Evaluating models..."; case "compiling_results": return "Compiling results..."; case "complete": return "Evaluation completed successfully!"; default: return "Processing..."; } }; // Get current step info for display const getCurrentStepInfo = () => { const totalSteps = EVALUATION_STEPS.length; const currentStepIndex = activeStep; // If no active step yet if (currentStepIndex === 0 && completedSteps.length === 0) { return `Starting... (0%)`; } // If all steps completed if (currentStepIndex >= totalSteps) { return `Completed (100%)`; } // Calculate percentage const percentage = Math.round((currentStepIndex / totalSteps) * 100); // Get current step name const currentStepName = STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] || "Processing"; return `${currentStepName} (${percentage}%)`; }; // Function to navigate to results page const viewResults = () => { navigate(`/evaluation-display?session=${sessionId}`); }; return ( {error ? ( {error} ) : ( <> {evaluationComplete ? ( <> Evaluation completed successfully! ) : ( <> {getPhaseTitle()} {/* Step progress indicator */} {getCurrentStepInfo()} {/* Timer display */} {formatElapsedTime()} )} )} {/* Use the LogDisplay component for logs */} ); }; export default BenchmarkEvaluation;