import React, { useState, useEffect } from "react"; import { Box, Paper, Typography, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Alert, LinearProgress, Card, CardContent, Link, } from "@mui/material"; import OpenInNewIcon from "@mui/icons-material/OpenInNew"; const EvaluationDisplay = ({ sessionId }) => { const [results, setResults] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); useEffect(() => { const fetchEvaluationResults = async () => { if (!sessionId) { setError("No session ID provided"); setLoading(false); return; } try { // Fetch evaluation results from the API const response = await fetch( `http://localhost:3001/evaluation-results/${sessionId}` ); if (!response.ok) { throw new Error(`Failed to fetch results: ${response.status}`); } const data = await response.json(); if (!data.success) { throw new Error(data.message || "Failed to fetch evaluation results"); } setResults(data.results); } catch (err) { console.error("Error fetching evaluation results:", err); setError(err.message); } finally { setLoading(false); } }; fetchEvaluationResults(); }, [sessionId]); // Format accuracy as percentage const formatAccuracy = (value) => { return `${(value * 100).toFixed(2)}%`; }; // Format evaluation time const formatTime = (seconds) => { return `${seconds.toFixed(2)}s`; }; if (loading) { return ( Loading Evaluation Results... ); } if (error) { return ( {error} ); } if ( !results || !results.models_comparison || results.models_comparison.length === 0 ) { return ( No evaluation results found for this benchmark. ); } return ( Evaluation Results Rank Model Provider Accuracy Std Error Eval Time Status {results.models_comparison.map((model, index) => ( {index + 1} {model.model_name} {model.provider} {model.success ? formatAccuracy(model.accuracy) : "-"} {model.success ? formatAccuracy(model.accuracy_stderr) : "-"} {model.success ? formatTime(model.evaluation_time) : "-"} {model.success ? ( ✓ Success ) : ( ✗ Failed )} ))}
Need larger evaluation?{" "} Go to this page
); }; export default EvaluationDisplay;