Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import React, { useState, useEffect, useRef } from "react"; | |
import { | |
Box, | |
Typography, | |
CircularProgress, | |
Alert, | |
Paper, | |
Divider, | |
Button, | |
} from "@mui/material"; | |
import AccessTimeIcon from "@mui/icons-material/AccessTime"; | |
import LogDisplay from "./LogDisplay"; | |
import { useNavigate } from "react-router-dom"; | |
// Evaluation steps | |
const EVALUATION_STEPS = [ | |
"preparation", | |
"model_evaluation", | |
"results_compilation", | |
]; | |
// Friendly step names for display | |
const STEP_LABELS = { | |
preparation: "Preparation", | |
model_evaluation: "Model Evaluation", | |
results_compilation: "Results Compilation", | |
}; | |
/** | |
* Component to handle benchmark evaluation and display logs | |
* | |
* @param {Object} props - Component props | |
* @param {string} props.sessionId - Session ID of the benchmark to evaluate | |
* @param {Function} props.onComplete - Function to call when evaluation is complete | |
* @returns {JSX.Element} Benchmark evaluation component | |
*/ | |
const BenchmarkEvaluation = ({ sessionId, onComplete }) => { | |
const [evaluating, setEvaluating] = useState(false); | |
const [evaluationComplete, setEvaluationComplete] = useState(false); | |
const [evaluationLogs, setEvaluationLogs] = useState([]); | |
const [error, setError] = useState(null); | |
const [currentPhase, setCurrentPhase] = useState("initializing"); | |
const [completedSteps, setCompletedSteps] = useState([]); | |
const [activeStep, setActiveStep] = useState(0); | |
const [elapsedTime, setElapsedTime] = useState(0); | |
const pollingIntervalRef = useRef(null); | |
const timerIntervalRef = useRef(null); | |
const startTimeRef = useRef(null); | |
const navigate = useNavigate(); | |
// Start evaluation when component mounts | |
useEffect(() => { | |
// Set start time | |
startTimeRef.current = Date.now(); | |
// Start timer | |
timerIntervalRef.current = setInterval(() => { | |
const timeElapsed = Math.floor( | |
(Date.now() - startTimeRef.current) / 1000 | |
); | |
setElapsedTime(timeElapsed); | |
}, 1000); | |
startEvaluation(); | |
// Clean up intervals on unmount | |
return () => { | |
if (pollingIntervalRef.current) { | |
clearInterval(pollingIntervalRef.current); | |
} | |
if (timerIntervalRef.current) { | |
clearInterval(timerIntervalRef.current); | |
} | |
}; | |
}, []); | |
// Determine current phase and completed steps from logs | |
useEffect(() => { | |
if (evaluationLogs.length === 0) return; | |
// Check all logs for completed steps | |
const newCompletedSteps = [...completedSteps]; | |
let newActiveStep = activeStep; | |
evaluationLogs.forEach((log) => { | |
// Detect completed steps (format: [SUCCESS] Stage completed: step_name) | |
const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/); | |
if (match && match[1]) { | |
const completedStep = match[1].trim(); | |
if ( | |
EVALUATION_STEPS.includes(completedStep) && | |
!newCompletedSteps.includes(completedStep) | |
) { | |
newCompletedSteps.push(completedStep); | |
// Set active step to index of next step | |
const stepIndex = EVALUATION_STEPS.indexOf(completedStep); | |
if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) { | |
newActiveStep = stepIndex + 1; | |
if (newActiveStep >= EVALUATION_STEPS.length) { | |
newActiveStep = EVALUATION_STEPS.length; | |
} | |
} | |
} | |
} | |
}); | |
// Update state if there are new completed steps | |
if (newCompletedSteps.length > completedSteps.length) { | |
setCompletedSteps(newCompletedSteps); | |
setActiveStep(newActiveStep); | |
} | |
// Check recent logs to determine current phase | |
const recentLogs = evaluationLogs.slice(-10); | |
// Detect completion conditions | |
const isComplete = | |
recentLogs.some((log) => | |
log.includes("[SUCCESS] Evaluation completed") | |
) || | |
completedSteps.includes("results_compilation") || | |
newCompletedSteps.includes("results_compilation"); | |
if (isComplete) { | |
setCurrentPhase("complete"); | |
setEvaluationComplete(true); | |
// Stop polling when evaluation is complete | |
if (pollingIntervalRef.current) { | |
clearInterval(pollingIntervalRef.current); | |
} | |
if (timerIntervalRef.current) { | |
clearInterval(timerIntervalRef.current); | |
} | |
// Notify parent component that evaluation is complete | |
if (onComplete) { | |
onComplete({ | |
success: true, | |
sessionId, | |
logs: evaluationLogs, | |
}); | |
} | |
} else if (recentLogs.some((log) => log.includes("Comparing models"))) { | |
setCurrentPhase("compiling_results"); | |
} else if (recentLogs.some((log) => log.includes("Starting evaluations"))) { | |
setCurrentPhase("evaluating"); | |
} else if (recentLogs.some((log) => log.includes("Initialization"))) { | |
setCurrentPhase("preparing"); | |
} | |
}, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]); | |
// Format elapsed time as HH:MM:SS | |
const formatElapsedTime = () => { | |
const hours = Math.floor(elapsedTime / 3600); | |
const minutes = Math.floor((elapsedTime % 3600) / 60); | |
const seconds = elapsedTime % 60; | |
return [ | |
hours.toString().padStart(2, "0"), | |
minutes.toString().padStart(2, "0"), | |
seconds.toString().padStart(2, "0"), | |
].join(":"); | |
}; | |
// Start benchmark evaluation | |
const startEvaluation = async () => { | |
if (!sessionId) { | |
setError("Missing session ID"); | |
return; | |
} | |
setEvaluating(true); | |
setEvaluationLogs([]); | |
setError(null); | |
setCurrentPhase("initializing"); | |
setCompletedSteps([]); | |
setActiveStep(0); | |
try { | |
// Call API to start evaluation | |
const response = await fetch("http://localhost:3001/evaluate-benchmark", { | |
method: "POST", | |
headers: { | |
"Content-Type": "application/json", | |
}, | |
body: JSON.stringify({ | |
session_id: sessionId, | |
}), | |
}); | |
const result = await response.json(); | |
if (response.ok) { | |
setEvaluationLogs(result.logs || []); | |
// Set up polling to retrieve more logs | |
pollingIntervalRef.current = setInterval(async () => { | |
// Check if we're already done | |
if (evaluationComplete) { | |
clearInterval(pollingIntervalRef.current); | |
return; | |
} | |
try { | |
// Call API to get latest logs | |
const logsResponse = await fetch( | |
`http://localhost:3001/evaluation-logs/${sessionId}` | |
); | |
if (logsResponse.ok) { | |
const logsResult = await logsResponse.json(); | |
// Update logs if there are new ones | |
if ( | |
logsResult.logs && | |
logsResult.logs.length > evaluationLogs.length | |
) { | |
setEvaluationLogs(logsResult.logs); | |
} | |
// Check if evaluation is complete | |
if (logsResult.is_completed) { | |
setEvaluationComplete(true); | |
clearInterval(pollingIntervalRef.current); | |
} | |
} | |
} catch (error) { | |
console.log("Error polling logs:", error); | |
// Don't stop polling on network errors | |
} | |
}, 2000); // Poll every 2 seconds | |
} else { | |
// Handle error | |
setEvaluationLogs([`Error: ${result.error || "Unknown error"}`]); | |
setError(result.error || "Benchmark evaluation failed"); | |
} | |
} catch (error) { | |
console.error("Error starting evaluation:", error); | |
setEvaluationLogs([`Error: ${error.message || "Unknown error"}`]); | |
setError("Error connecting to server"); | |
} finally { | |
setEvaluating(false); | |
} | |
}; | |
// Get title based on current phase | |
const getPhaseTitle = () => { | |
switch (currentPhase) { | |
case "initializing": | |
return "Preparing evaluation..."; | |
case "preparing": | |
return "Preparing models..."; | |
case "evaluating": | |
return "Evaluating models..."; | |
case "compiling_results": | |
return "Compiling results..."; | |
case "complete": | |
return "Evaluation completed successfully!"; | |
default: | |
return "Processing..."; | |
} | |
}; | |
// Get current step info for display | |
const getCurrentStepInfo = () => { | |
const totalSteps = EVALUATION_STEPS.length; | |
const currentStepIndex = activeStep; | |
// If no active step yet | |
if (currentStepIndex === 0 && completedSteps.length === 0) { | |
return `Starting... (0%)`; | |
} | |
// If all steps completed | |
if (currentStepIndex >= totalSteps) { | |
return `Completed (100%)`; | |
} | |
// Calculate percentage | |
const percentage = Math.round((currentStepIndex / totalSteps) * 100); | |
// Get current step name | |
const currentStepName = | |
STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] || "Processing"; | |
return `${currentStepName} (${percentage}%)`; | |
}; | |
// Function to navigate to results page | |
const viewResults = () => { | |
navigate(`/evaluation-display?session=${sessionId}`); | |
}; | |
return ( | |
<Paper | |
elevation={3} | |
sx={{ | |
p: 4, | |
mt: 3, | |
mb: 3, | |
display: "flex", | |
flexDirection: "column", | |
alignItems: "center", | |
justifyContent: "center", | |
minHeight: 200, | |
}} | |
> | |
{error ? ( | |
<Alert severity="error" sx={{ width: "100%" }}> | |
{error} | |
</Alert> | |
) : ( | |
<> | |
{evaluationComplete ? ( | |
<> | |
<Alert severity="success" sx={{ width: "100%", mb: 3 }}> | |
Evaluation completed successfully! | |
</Alert> | |
<Button | |
variant="contained" | |
color="primary" | |
onClick={viewResults} | |
sx={{ mb: 3 }} | |
> | |
View Results Leaderboard | |
</Button> | |
</> | |
) : ( | |
<> | |
<CircularProgress size={60} sx={{ mb: 2 }} /> | |
<Typography variant="h6" component="div" gutterBottom> | |
{getPhaseTitle()} | |
</Typography> | |
{/* Step progress indicator */} | |
<Typography variant="body1" color="text.secondary"> | |
{getCurrentStepInfo()} | |
</Typography> | |
{/* Timer display */} | |
<Box | |
sx={{ | |
display: "flex", | |
alignItems: "center", | |
mt: 1, | |
color: "text.secondary", | |
opacity: 0.5, | |
}} | |
> | |
<Typography variant="body2">{formatElapsedTime()}</Typography> | |
</Box> | |
</> | |
)} | |
</> | |
)} | |
{/* Use the LogDisplay component for logs */} | |
<LogDisplay logs={evaluationLogs} height={300} /> | |
</Paper> | |
); | |
}; | |
export default BenchmarkEvaluation; | |