Spaces:

yourbench
/

demo

Running on CPU Upgrade

File size: 5,611 Bytes

970eef1

import React, { useState, useEffect } from "react";
import {
  Box,
  Paper,
  Typography,
  Table,
  TableBody,
  TableCell,
  TableContainer,
  TableHead,
  TableRow,
  Alert,
  LinearProgress,
  Card,
  CardContent,
  Link,
} from "@mui/material";
import OpenInNewIcon from "@mui/icons-material/OpenInNew";

const EvaluationDisplay = ({ sessionId }) => {
  const [results, setResults] = useState(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState(null);

  useEffect(() => {
    const fetchEvaluationResults = async () => {
      if (!sessionId) {
        setError("No session ID provided");
        setLoading(false);
        return;
      }

      try {
        // Fetch evaluation results from the API
        const response = await fetch(
          `http://localhost:3001/evaluation-results/${sessionId}`
        );

        if (!response.ok) {
          throw new Error(`Failed to fetch results: ${response.status}`);
        }

        const data = await response.json();

        if (!data.success) {
          throw new Error(data.message || "Failed to fetch evaluation results");
        }

        setResults(data.results);
      } catch (err) {
        console.error("Error fetching evaluation results:", err);
        setError(err.message);
      } finally {
        setLoading(false);
      }
    };

    fetchEvaluationResults();
  }, [sessionId]);

  // Format accuracy as percentage
  const formatAccuracy = (value) => {
    return `${(value * 100).toFixed(2)}%`;
  };

  // Format evaluation time
  const formatTime = (seconds) => {
    return `${seconds.toFixed(2)}s`;
  };

  if (loading) {
    return (
      <Box sx={{ width: "100%", mt: 4, mb: 4 }}>
        <Typography variant="h5" gutterBottom>
          Loading Evaluation Results...
        </Typography>
        <LinearProgress />
      </Box>
    );
  }

  if (error) {
    return (
      <Alert severity="error" sx={{ mt: 4, mb: 4 }}>
        {error}
      </Alert>
    );
  }

  if (
    !results ||
    !results.models_comparison ||
    results.models_comparison.length === 0
  ) {
    return (
      <Alert severity="info" sx={{ mt: 4, mb: 4 }}>
        No evaluation results found for this benchmark.
      </Alert>
    );
  }

  return (
    <Box sx={{ mt: 4, mb: 6 }}>
      <Typography variant="h4" gutterBottom>
        Evaluation Results
      </Typography>

      <TableContainer
        component={Paper}
        sx={{
          border: "1px solid rgba(224, 224, 224, 1)",
          boxShadow: "0 2px 4px rgba(0,0,0,0.05)",
        }}
      >
        <Table sx={{ minWidth: 650 }}>
          <TableHead>
            <TableRow>
              <TableCell>Rank</TableCell>
              <TableCell>Model</TableCell>
              <TableCell>Provider</TableCell>
              <TableCell align="center">Accuracy</TableCell>
              <TableCell align="center">Std Error</TableCell>
              <TableCell align="center">Eval Time</TableCell>
              <TableCell align="center">Status</TableCell>
            </TableRow>
          </TableHead>
          <TableBody>
            {results.models_comparison.map((model, index) => (
              <TableRow
                key={`${model.model_name}-${model.provider}`}
                sx={{
                  "&:last-child td, &:last-child th": { border: 0 },
                  backgroundColor: model.success
                    ? "inherit"
                    : "rgba(0, 0, 0, 0.04)",
                }}
              >
                <TableCell>{index + 1}</TableCell>
                <TableCell component="th" scope="row">
                  <Link
                    href={`https://huggingface.co/${model.model_name}`}
                    target="_blank"
                    rel="noopener noreferrer"
                    sx={{
                      textDecoration: "none",
                      "&:hover": {
                        textDecoration: "underline",
                      },
                      display: "flex",
                      alignItems: "center",
                    }}
                  >
                    {model.model_name}
                    <OpenInNewIcon sx={{ ml: 0.5, fontSize: 16 }} />
                  </Link>
                </TableCell>
                <TableCell>{model.provider}</TableCell>
                <TableCell align="center">
                  {model.success ? formatAccuracy(model.accuracy) : "-"}
                </TableCell>
                <TableCell align="center">
                  {model.success ? formatAccuracy(model.accuracy_stderr) : "-"}
                </TableCell>
                <TableCell align="center">
                  {model.success ? formatTime(model.evaluation_time) : "-"}
                </TableCell>
                <TableCell align="center">
                  {model.success ? (
                    <span style={{ color: "green" }}>✓ Success</span>
                  ) : (
                    <span style={{ color: "red" }}>✗ Failed</span>
                  )}
                </TableCell>
              </TableRow>
            ))}
          </TableBody>
        </Table>
      </TableContainer>

      <Box sx={{ mt: 4, textAlign: "center" }}>
        <Typography variant="body2" color="textSecondary">
          Need larger evaluation?{" "}
          <Link
            href="https://huggingface.co/spaces/yourbench/yourbench"
            target="_blank"
            rel="noopener noreferrer"
          >
            Go to this page
          </Link>
        </Typography>
      </Box>
    </Box>
  );
};

export default EvaluationDisplay;