Spaces:

yourbench
/

demo

Running on CPU Upgrade

File size: 10,890 Bytes

970eef1

import React, { useState, useEffect, useRef } from "react";
import {
  Box,
  Typography,
  CircularProgress,
  Alert,
  Paper,
  Divider,
  Button,
} from "@mui/material";
import AccessTimeIcon from "@mui/icons-material/AccessTime";
import LogDisplay from "./LogDisplay";
import { useNavigate } from "react-router-dom";

// Evaluation steps
const EVALUATION_STEPS = [
  "preparation",
  "model_evaluation",
  "results_compilation",
];

// Friendly step names for display
const STEP_LABELS = {
  preparation: "Preparation",
  model_evaluation: "Model Evaluation",
  results_compilation: "Results Compilation",
};

/**
 * Component to handle benchmark evaluation and display logs
 *
 * @param {Object} props - Component props
 * @param {string} props.sessionId - Session ID of the benchmark to evaluate
 * @param {Function} props.onComplete - Function to call when evaluation is complete
 * @returns {JSX.Element} Benchmark evaluation component
 */
const BenchmarkEvaluation = ({ sessionId, onComplete }) => {
  const [evaluating, setEvaluating] = useState(false);
  const [evaluationComplete, setEvaluationComplete] = useState(false);
  const [evaluationLogs, setEvaluationLogs] = useState([]);
  const [error, setError] = useState(null);
  const [currentPhase, setCurrentPhase] = useState("initializing");
  const [completedSteps, setCompletedSteps] = useState([]);
  const [activeStep, setActiveStep] = useState(0);
  const [elapsedTime, setElapsedTime] = useState(0);

  const pollingIntervalRef = useRef(null);
  const timerIntervalRef = useRef(null);
  const startTimeRef = useRef(null);

  const navigate = useNavigate();

  // Start evaluation when component mounts
  useEffect(() => {
    // Set start time
    startTimeRef.current = Date.now();

    // Start timer
    timerIntervalRef.current = setInterval(() => {
      const timeElapsed = Math.floor(
        (Date.now() - startTimeRef.current) / 1000
      );
      setElapsedTime(timeElapsed);
    }, 1000);

    startEvaluation();

    // Clean up intervals on unmount
    return () => {
      if (pollingIntervalRef.current) {
        clearInterval(pollingIntervalRef.current);
      }
      if (timerIntervalRef.current) {
        clearInterval(timerIntervalRef.current);
      }
    };
  }, []);

  // Determine current phase and completed steps from logs
  useEffect(() => {
    if (evaluationLogs.length === 0) return;

    // Check all logs for completed steps
    const newCompletedSteps = [...completedSteps];
    let newActiveStep = activeStep;

    evaluationLogs.forEach((log) => {
      // Detect completed steps (format: [SUCCESS] Stage completed: step_name)
      const match = log.match(/\[SUCCESS\] Stage completed: (\w+)/);
      if (match && match[1]) {
        const completedStep = match[1].trim();
        if (
          EVALUATION_STEPS.includes(completedStep) &&
          !newCompletedSteps.includes(completedStep)
        ) {
          newCompletedSteps.push(completedStep);
          // Set active step to index of next step
          const stepIndex = EVALUATION_STEPS.indexOf(completedStep);
          if (stepIndex >= 0 && stepIndex + 1 > newActiveStep) {
            newActiveStep = stepIndex + 1;
            if (newActiveStep >= EVALUATION_STEPS.length) {
              newActiveStep = EVALUATION_STEPS.length;
            }
          }
        }
      }
    });

    // Update state if there are new completed steps
    if (newCompletedSteps.length > completedSteps.length) {
      setCompletedSteps(newCompletedSteps);
      setActiveStep(newActiveStep);
    }

    // Check recent logs to determine current phase
    const recentLogs = evaluationLogs.slice(-10);

    // Detect completion conditions
    const isComplete =
      recentLogs.some((log) =>
        log.includes("[SUCCESS] Evaluation completed")
      ) ||
      completedSteps.includes("results_compilation") ||
      newCompletedSteps.includes("results_compilation");

    if (isComplete) {
      setCurrentPhase("complete");
      setEvaluationComplete(true);
      // Stop polling when evaluation is complete
      if (pollingIntervalRef.current) {
        clearInterval(pollingIntervalRef.current);
      }
      if (timerIntervalRef.current) {
        clearInterval(timerIntervalRef.current);
      }
      // Notify parent component that evaluation is complete
      if (onComplete) {
        onComplete({
          success: true,
          sessionId,
          logs: evaluationLogs,
        });
      }
    } else if (recentLogs.some((log) => log.includes("Comparing models"))) {
      setCurrentPhase("compiling_results");
    } else if (recentLogs.some((log) => log.includes("Starting evaluations"))) {
      setCurrentPhase("evaluating");
    } else if (recentLogs.some((log) => log.includes("Initialization"))) {
      setCurrentPhase("preparing");
    }
  }, [evaluationLogs, completedSteps, activeStep, sessionId, onComplete]);

  // Format elapsed time as HH:MM:SS
  const formatElapsedTime = () => {
    const hours = Math.floor(elapsedTime / 3600);
    const minutes = Math.floor((elapsedTime % 3600) / 60);
    const seconds = elapsedTime % 60;

    return [
      hours.toString().padStart(2, "0"),
      minutes.toString().padStart(2, "0"),
      seconds.toString().padStart(2, "0"),
    ].join(":");
  };

  // Start benchmark evaluation
  const startEvaluation = async () => {
    if (!sessionId) {
      setError("Missing session ID");
      return;
    }

    setEvaluating(true);
    setEvaluationLogs([]);
    setError(null);
    setCurrentPhase("initializing");
    setCompletedSteps([]);
    setActiveStep(0);

    try {
      // Call API to start evaluation
      const response = await fetch("http://localhost:3001/evaluate-benchmark", {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
        },
        body: JSON.stringify({
          session_id: sessionId,
        }),
      });

      const result = await response.json();

      if (response.ok) {
        setEvaluationLogs(result.logs || []);

        // Set up polling to retrieve more logs
        pollingIntervalRef.current = setInterval(async () => {
          // Check if we're already done
          if (evaluationComplete) {
            clearInterval(pollingIntervalRef.current);
            return;
          }

          try {
            // Call API to get latest logs
            const logsResponse = await fetch(
              `http://localhost:3001/evaluation-logs/${sessionId}`
            );

            if (logsResponse.ok) {
              const logsResult = await logsResponse.json();

              // Update logs if there are new ones
              if (
                logsResult.logs &&
                logsResult.logs.length > evaluationLogs.length
              ) {
                setEvaluationLogs(logsResult.logs);
              }

              // Check if evaluation is complete
              if (logsResult.is_completed) {
                setEvaluationComplete(true);
                clearInterval(pollingIntervalRef.current);
              }
            }
          } catch (error) {
            console.log("Error polling logs:", error);
            // Don't stop polling on network errors
          }
        }, 2000); // Poll every 2 seconds
      } else {
        // Handle error
        setEvaluationLogs([`Error: ${result.error || "Unknown error"}`]);
        setError(result.error || "Benchmark evaluation failed");
      }
    } catch (error) {
      console.error("Error starting evaluation:", error);
      setEvaluationLogs([`Error: ${error.message || "Unknown error"}`]);
      setError("Error connecting to server");
    } finally {
      setEvaluating(false);
    }
  };

  // Get title based on current phase
  const getPhaseTitle = () => {
    switch (currentPhase) {
      case "initializing":
        return "Preparing evaluation...";
      case "preparing":
        return "Preparing models...";
      case "evaluating":
        return "Evaluating models...";
      case "compiling_results":
        return "Compiling results...";
      case "complete":
        return "Evaluation completed successfully!";
      default:
        return "Processing...";
    }
  };

  // Get current step info for display
  const getCurrentStepInfo = () => {
    const totalSteps = EVALUATION_STEPS.length;
    const currentStepIndex = activeStep;

    // If no active step yet
    if (currentStepIndex === 0 && completedSteps.length === 0) {
      return `Starting... (0%)`;
    }

    // If all steps completed
    if (currentStepIndex >= totalSteps) {
      return `Completed (100%)`;
    }

    // Calculate percentage
    const percentage = Math.round((currentStepIndex / totalSteps) * 100);

    // Get current step name
    const currentStepName =
      STEP_LABELS[EVALUATION_STEPS[currentStepIndex]] || "Processing";

    return `${currentStepName} (${percentage}%)`;
  };

  // Function to navigate to results page
  const viewResults = () => {
    navigate(`/evaluation-display?session=${sessionId}`);
  };

  return (
    <Paper
      elevation={3}
      sx={{
        p: 4,
        mt: 3,
        mb: 3,
        display: "flex",
        flexDirection: "column",
        alignItems: "center",
        justifyContent: "center",
        minHeight: 200,
      }}
    >
      {error ? (
        <Alert severity="error" sx={{ width: "100%" }}>
          {error}
        </Alert>
      ) : (
        <>
          {evaluationComplete ? (
            <>
              <Alert severity="success" sx={{ width: "100%", mb: 3 }}>
                Evaluation completed successfully!
              </Alert>
              <Button
                variant="contained"
                color="primary"
                onClick={viewResults}
                sx={{ mb: 3 }}
              >
                View Results Leaderboard
              </Button>
            </>
          ) : (
            <>
              <CircularProgress size={60} sx={{ mb: 2 }} />
              <Typography variant="h6" component="div" gutterBottom>
                {getPhaseTitle()}
              </Typography>

              {/* Step progress indicator */}
              <Typography variant="body1" color="text.secondary">
                {getCurrentStepInfo()}
              </Typography>

              {/* Timer display */}
              <Box
                sx={{
                  display: "flex",
                  alignItems: "center",
                  mt: 1,
                  color: "text.secondary",
                  opacity: 0.5,
                }}
              >
                <Typography variant="body2">{formatElapsedTime()}</Typography>
              </Box>
            </>
          )}
        </>
      )}

      {/* Use the LogDisplay component for logs */}
      <LogDisplay logs={evaluationLogs} height={300} />
    </Paper>
  );
};

export default BenchmarkEvaluation;