demo / frontend /src /components /BenchmarkEvaluation.jsx
tfrere's picture
first commit
970eef1
raw
history blame
10.9 kB
import React, { useState, useEffect, useRef } from "react";
import {
Box,
Typography,
CircularProgress,
Alert,
Paper,
Divider,
Button,
} from "@mui/material";
import AccessTimeIcon from "@mui/icons-material/AccessTime";
import LogDisplay from "./LogDisplay";
import { useNavigate } from "react-router-dom";
// Evaluation steps
const EVALUATION_STEPS = [
"preparation",
"model_evaluation",
"results_compilation",
];
// Friendly step names for display
const STEP_LABELS = {
preparation: "Preparation",
model_evaluation: "Model Evaluation",
results_compilation: "Results Compilation",
};
/**
* Component to handle benchmark evaluation and display logs
*
* @param {Object} props - Component props
* @param {string} props.sessionId - Session ID of the benchmark to evaluate
* @param {Function} props.onComplete - Function to call when evaluation is complete
* @returns {JSX.Element} Benchmark evaluation component
*/
const BenchmarkEvaluation = ({ sessionId, onComplete }) => {
const [evaluating, setEvaluating] = useState(false);
const [evaluationComplete, setEvaluationComplete] = useState(false);
const [evaluationLogs, setEvaluationLogs] = useState([]);
const [error, setError] = useState(null);
const [currentPhase, setCurrentPhase] = useState("initializing");
const [completedSteps, setCompletedSteps] = useState([]);
const [activeStep, setActiveStep] = useState(0);
const [elapsedTime, setElapsedTime] = useState(0);
const pollingIntervalRef = useRef(null);
const timerIntervalRef = useRef(null);
const startTimeRef = useRef(null);
const navigate = useNavigate();
// Start evaluation when component mounts
useEffect(() => {
// Set start time
startTimeRef.current = Date.now();
// Start timer
timerIntervalRef.current = setInterval(() => {
const timeElapsed = Math.floor(
(Date.now() - startTimeRef.current) / 1000
);
setElapsedTime(timeElapsed);
}, 1000);
startEvaluation();
// Clean up intervals on unmount
return () => {
if (pollingIntervalRef.current) {
clearInterval(pollingIntervalRef.current);
}
if (timerIntervalRef.current) {
clearInterval(timerIntervalRef.current);
}
};
}, []);
// Determine current phase and completed steps from logs
useEffect(() => {
if (evaluationLogs.length === 0) return;
// Check all logs for completed steps
const newCompletedSteps = [...completedSteps];
let newActiveStep = activeStep;
evaluationLogs.forEach((log) => {
// Detect completed steps (format: [SUCCESS] Stage completed: step_name)
const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
if (match && match[1]) {
const completedStep = match[1].trim();
if (
EVALUATION_STEPS.includes(completedStep) &&
!newCompletedSteps.includes(completedStep)
) {
newCompletedSteps.push(completedStep);
// Set active step to index of next step
const stepIndex = EVALUATION_STEPS.indexOf(completedStep);
if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) {
newActiveStep = stepIndex + 1;
if (newActiveStep >= EVALUATION_STEPS.length) {
newActiveStep = EVALUATION_STEPS.length;
}
}
}
}
});
// Update state if there are new completed steps
if (newCompletedSteps.length > completedSteps.length) {
setCompletedSteps(newCompletedSteps);
setActiveStep(newActiveStep);
}
// Check recent logs to determine current phase
const recentLogs = evaluationLogs.slice(-10);
// Detect completion conditions
const isComplete =
recentLogs.some((log) =>
log.includes("[SUCCESS] Evaluation completed")
) ||
completedSteps.includes("results_compilation") ||
newCompletedSteps.includes("results_compilation");
if (isComplete) {
setCurrentPhase("complete");
setEvaluationComplete(true);
// Stop polling when evaluation is complete
if (pollingIntervalRef.current) {
clearInterval(pollingIntervalRef.current);
}
if (timerIntervalRef.current) {
clearInterval(timerIntervalRef.current);
}
// Notify parent component that evaluation is complete
if (onComplete) {
onComplete({
success: true,
sessionId,
logs: evaluationLogs,
});
}
} else if (recentLogs.some((log) => log.includes("Comparing models"))) {
setCurrentPhase("compiling_results");
} else if (recentLogs.some((log) => log.includes("Starting evaluations"))) {
setCurrentPhase("evaluating");
} else if (recentLogs.some((log) => log.includes("Initialization"))) {
setCurrentPhase("preparing");
}
}, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]);
// Format elapsed time as HH:MM:SS
const formatElapsedTime = () => {
const hours = Math.floor(elapsedTime / 3600);
const minutes = Math.floor((elapsedTime % 3600) / 60);
const seconds = elapsedTime % 60;
return [
hours.toString().padStart(2, "0"),
minutes.toString().padStart(2, "0"),
seconds.toString().padStart(2, "0"),
].join(":");
};
// Start benchmark evaluation
const startEvaluation = async () => {
if (!sessionId) {
setError("Missing session ID");
return;
}
setEvaluating(true);
setEvaluationLogs([]);
setError(null);
setCurrentPhase("initializing");
setCompletedSteps([]);
setActiveStep(0);
try {
// Call API to start evaluation
const response = await fetch("http://localhost:3001/evaluate-benchmark", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
session_id: sessionId,
}),
});
const result = await response.json();
if (response.ok) {
setEvaluationLogs(result.logs || []);
// Set up polling to retrieve more logs
pollingIntervalRef.current = setInterval(async () => {
// Check if we're already done
if (evaluationComplete) {
clearInterval(pollingIntervalRef.current);
return;
}
try {
// Call API to get latest logs
const logsResponse = await fetch(
`http://localhost:3001/evaluation-logs/${sessionId}`
);
if (logsResponse.ok) {
const logsResult = await logsResponse.json();
// Update logs if there are new ones
if (
logsResult.logs &&
logsResult.logs.length > evaluationLogs.length
) {
setEvaluationLogs(logsResult.logs);
}
// Check if evaluation is complete
if (logsResult.is_completed) {
setEvaluationComplete(true);
clearInterval(pollingIntervalRef.current);
}
}
} catch (error) {
console.log("Error polling logs:", error);
// Don't stop polling on network errors
}
}, 2000); // Poll every 2 seconds
} else {
// Handle error
setEvaluationLogs([`Error: ${result.error || "Unknown error"}`]);
setError(result.error || "Benchmark evaluation failed");
}
} catch (error) {
console.error("Error starting evaluation:", error);
setEvaluationLogs([`Error: ${error.message || "Unknown error"}`]);
setError("Error connecting to server");
} finally {
setEvaluating(false);
}
};
// Get title based on current phase
const getPhaseTitle = () => {
switch (currentPhase) {
case "initializing":
return "Preparing evaluation...";
case "preparing":
return "Preparing models...";
case "evaluating":
return "Evaluating models...";
case "compiling_results":
return "Compiling results...";
case "complete":
return "Evaluation completed successfully!";
default:
return "Processing...";
}
};
// Get current step info for display
const getCurrentStepInfo = () => {
const totalSteps = EVALUATION_STEPS.length;
const currentStepIndex = activeStep;
// If no active step yet
if (currentStepIndex === 0 && completedSteps.length === 0) {
return `Starting... (0%)`;
}
// If all steps completed
if (currentStepIndex >= totalSteps) {
return `Completed (100%)`;
}
// Calculate percentage
const percentage = Math.round((currentStepIndex / totalSteps) * 100);
// Get current step name
const currentStepName =
STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] || "Processing";
return `${currentStepName} (${percentage}%)`;
};
// Function to navigate to results page
const viewResults = () => {
navigate(`/evaluation-display?session=${sessionId}`);
};
return (
<Paper
elevation={3}
sx={{
p: 4,
mt: 3,
mb: 3,
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
minHeight: 200,
}}
>
{error ? (
<Alert severity="error" sx={{ width: "100%" }}>
{error}
</Alert>
) : (
<>
{evaluationComplete ? (
<>
<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
Evaluation completed successfully!
</Alert>
<Button
variant="contained"
color="primary"
onClick={viewResults}
sx={{ mb: 3 }}
>
View Results Leaderboard
</Button>
</>
) : (
<>
<CircularProgress size={60} sx={{ mb: 2 }} />
<Typography variant="h6" component="div" gutterBottom>
{getPhaseTitle()}
</Typography>
{/* Step progress indicator */}
<Typography variant="body1" color="text.secondary">
{getCurrentStepInfo()}
</Typography>
{/* Timer display */}
<Box
sx={{
display: "flex",
alignItems: "center",
mt: 1,
color: "text.secondary",
opacity: 0.5,
}}
>
<Typography variant="body2">{formatElapsedTime()}</Typography>
</Box>
</>
)}
</>
)}
{/* Use the LogDisplay component for logs */}
<LogDisplay logs={evaluationLogs} height={300} />
</Paper>
);
};
export default BenchmarkEvaluation;