Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

demo / frontend /src /components /BenchmarkEvaluation.jsx

tfrere

first commit

970eef1 26 days ago

raw

history blame

10.9 kB

	import React, { useState, useEffect, useRef } from "react";
	import {
	Box,
	Typography,
	CircularProgress,
	Alert,
	Paper,
	Divider,
	Button,
	} from "@mui/material";
	import AccessTimeIcon from "@mui/icons-material/AccessTime";
	import LogDisplay from "./LogDisplay";
	import { useNavigate } from "react-router-dom";

	// Evaluation steps
	const EVALUATION_STEPS = [
	"preparation",
	"model_evaluation",
	"results_compilation",
	];

	// Friendly step names for display
	const STEP_LABELS = {
	preparation: "Preparation",
	model_evaluation: "Model Evaluation",
	results_compilation: "Results Compilation",
	};

	/**
	* Component to handle benchmark evaluation and display logs
	*
	* @param {Object} props - Component props
	* @param {string} props.sessionId - Session ID of the benchmark to evaluate
	* @param {Function} props.onComplete - Function to call when evaluation is complete
	* @returns {JSX.Element} Benchmark evaluation component
	*/
	const BenchmarkEvaluation = ({ sessionId, onComplete }) => {
	const [evaluating, setEvaluating] = useState(false);
	const [evaluationComplete, setEvaluationComplete] = useState(false);
	const [evaluationLogs, setEvaluationLogs] = useState([]);
	const [error, setError] = useState(null);
	const [currentPhase, setCurrentPhase] = useState("initializing");
	const [completedSteps, setCompletedSteps] = useState([]);
	const [activeStep, setActiveStep] = useState(0);
	const [elapsedTime, setElapsedTime] = useState(0);

	const pollingIntervalRef = useRef(null);
	const timerIntervalRef = useRef(null);
	const startTimeRef = useRef(null);

	const navigate = useNavigate();

	// Start evaluation when component mounts
	useEffect(() => {
	// Set start time
	startTimeRef.current = Date.now();

	// Start timer
	timerIntervalRef.current = setInterval(() => {
	const timeElapsed = Math.floor(
	(Date.now() - startTimeRef.current) / 1000
	);
	setElapsedTime(timeElapsed);
	}, 1000);

	startEvaluation();

	// Clean up intervals on unmount
	return () => {
	if (pollingIntervalRef.current) {
	clearInterval(pollingIntervalRef.current);
	}
	if (timerIntervalRef.current) {
	clearInterval(timerIntervalRef.current);
	}
	};
	}, []);

	// Determine current phase and completed steps from logs
	useEffect(() => {
	if (evaluationLogs.length === 0) return;

	// Check all logs for completed steps
	const newCompletedSteps = [...completedSteps];
	let newActiveStep = activeStep;

	evaluationLogs.forEach((log) => {
	// Detect completed steps (format: [SUCCESS] Stage completed: step_name)
	const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
	if (match && match[1]) {
	const completedStep = match[1].trim();
	if (
	EVALUATION_STEPS.includes(completedStep) &&
	!newCompletedSteps.includes(completedStep)
	) {
	newCompletedSteps.push(completedStep);
	// Set active step to index of next step
	const stepIndex = EVALUATION_STEPS.indexOf(completedStep);
	if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) {
	newActiveStep = stepIndex + 1;
	if (newActiveStep >= EVALUATION_STEPS.length) {
	newActiveStep = EVALUATION_STEPS.length;
	}
	}
	}
	}
	});

	// Update state if there are new completed steps
	if (newCompletedSteps.length > completedSteps.length) {
	setCompletedSteps(newCompletedSteps);
	setActiveStep(newActiveStep);
	}

	// Check recent logs to determine current phase
	const recentLogs = evaluationLogs.slice(-10);

	// Detect completion conditions
	const isComplete =
	recentLogs.some((log) =>
	log.includes("[SUCCESS] Evaluation completed")
	) \|\|
	completedSteps.includes("results_compilation") \|\|
	newCompletedSteps.includes("results_compilation");

	if (isComplete) {
	setCurrentPhase("complete");
	setEvaluationComplete(true);
	// Stop polling when evaluation is complete
	if (pollingIntervalRef.current) {
	clearInterval(pollingIntervalRef.current);
	}
	if (timerIntervalRef.current) {
	clearInterval(timerIntervalRef.current);
	}
	// Notify parent component that evaluation is complete
	if (onComplete) {
	onComplete({
	success: true,
	sessionId,
	logs: evaluationLogs,
	});
	}
	} else if (recentLogs.some((log) => log.includes("Comparing models"))) {
	setCurrentPhase("compiling_results");
	} else if (recentLogs.some((log) => log.includes("Starting evaluations"))) {
	setCurrentPhase("evaluating");
	} else if (recentLogs.some((log) => log.includes("Initialization"))) {
	setCurrentPhase("preparing");
	}
	}, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]);

	// Format elapsed time as HH:MM:SS
	const formatElapsedTime = () => {
	const hours = Math.floor(elapsedTime / 3600);
	const minutes = Math.floor((elapsedTime % 3600) / 60);
	const seconds = elapsedTime % 60;

	return [
	hours.toString().padStart(2, "0"),
	minutes.toString().padStart(2, "0"),
	seconds.toString().padStart(2, "0"),
	].join(":");
	};

	// Start benchmark evaluation
	const startEvaluation = async () => {
	if (!sessionId) {
	setError("Missing session ID");
	return;
	}

	setEvaluating(true);
	setEvaluationLogs([]);
	setError(null);
	setCurrentPhase("initializing");
	setCompletedSteps([]);
	setActiveStep(0);

	try {
	// Call API to start evaluation
	const response = await fetch("http://localhost:3001/evaluate-benchmark", {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	},
	body: JSON.stringify({
	session_id: sessionId,
	}),
	});

	const result = await response.json();

	if (response.ok) {
	setEvaluationLogs(result.logs \|\| []);

	// Set up polling to retrieve more logs
	pollingIntervalRef.current = setInterval(async () => {
	// Check if we're already done
	if (evaluationComplete) {
	clearInterval(pollingIntervalRef.current);
	return;
	}

	try {
	// Call API to get latest logs
	const logsResponse = await fetch(
	`http://localhost:3001/evaluation-logs/${sessionId}`
	);

	if (logsResponse.ok) {
	const logsResult = await logsResponse.json();

	// Update logs if there are new ones
	if (
	logsResult.logs &&
	logsResult.logs.length > evaluationLogs.length
	) {
	setEvaluationLogs(logsResult.logs);
	}

	// Check if evaluation is complete
	if (logsResult.is_completed) {
	setEvaluationComplete(true);
	clearInterval(pollingIntervalRef.current);
	}
	}
	} catch (error) {
	console.log("Error polling logs:", error);
	// Don't stop polling on network errors
	}
	}, 2000); // Poll every 2 seconds
	} else {
	// Handle error
	setEvaluationLogs([`Error: ${result.error \|\| "Unknown error"}`]);
	setError(result.error \|\| "Benchmark evaluation failed");
	}
	} catch (error) {
	console.error("Error starting evaluation:", error);
	setEvaluationLogs([`Error: ${error.message \|\| "Unknown error"}`]);
	setError("Error connecting to server");
	} finally {
	setEvaluating(false);
	}
	};

	// Get title based on current phase
	const getPhaseTitle = () => {
	switch (currentPhase) {
	case "initializing":
	return "Preparing evaluation...";
	case "preparing":
	return "Preparing models...";
	case "evaluating":
	return "Evaluating models...";
	case "compiling_results":
	return "Compiling results...";
	case "complete":
	return "Evaluation completed successfully!";
	default:
	return "Processing...";
	}
	};

	// Get current step info for display
	const getCurrentStepInfo = () => {
	const totalSteps = EVALUATION_STEPS.length;
	const currentStepIndex = activeStep;

	// If no active step yet
	if (currentStepIndex === 0 && completedSteps.length === 0) {
	return `Starting... (0%)`;
	}

	// If all steps completed
	if (currentStepIndex >= totalSteps) {
	return `Completed (100%)`;
	}

	// Calculate percentage
	const percentage = Math.round((currentStepIndex / totalSteps) * 100);

	// Get current step name
	const currentStepName =
	STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] \|\| "Processing";

	return `${currentStepName} (${percentage}%)`;
	};

	// Function to navigate to results page
	const viewResults = () => {
	navigate(`/evaluation-display?session=${sessionId}`);
	};

	return (
	<Paper
	elevation={3}
	sx={{
	p: 4,
	mt: 3,
	mb: 3,
	display: "flex",
	flexDirection: "column",
	alignItems: "center",
	justifyContent: "center",
	minHeight: 200,
	}}
	>
	{error ? (
	<Alert severity="error" sx={{ width: "100%" }}>
	{error}
	</Alert>
	) : (
	<>
	{evaluationComplete ? (
	<>
	<Alert severity="success" sx={{ width: "100%", mb: 3 }}>
	Evaluation completed successfully!
	</Alert>
	<Button
	variant="contained"
	color="primary"
	onClick={viewResults}
	sx={{ mb: 3 }}
	>
	View Results Leaderboard
	</Button>
	</>
	) : (
	<>
	<CircularProgress size={60} sx={{ mb: 2 }} />
	<Typography variant="h6" component="div" gutterBottom>
	{getPhaseTitle()}
	</Typography>

	{/* Step progress indicator */}
	<Typography variant="body1" color="text.secondary">
	{getCurrentStepInfo()}
	</Typography>

	{/* Timer display */}
	<Box
	sx={{
	display: "flex",
	alignItems: "center",
	mt: 1,
	color: "text.secondary",
	opacity: 0.5,
	}}
	>
	<Typography variant="body2">{formatElapsedTime()}</Typography>
	</Box>
	</>
	)}
	</>
	)}

	{/* Use the LogDisplay component for logs */}
	<LogDisplay logs={evaluationLogs} height={300} />
	</Paper>
	);
	};

	export default BenchmarkEvaluation;