import React, { useState, useEffect } from "react";
import {
Box,
Paper,
Typography,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Alert,
LinearProgress,
Card,
CardContent,
Link,
} from "@mui/material";
import OpenInNewIcon from "@mui/icons-material/OpenInNew";
const EvaluationDisplay = ({ sessionId }) => {
const [results, setResults] = useState(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
useEffect(() => {
const fetchEvaluationResults = async () => {
if (!sessionId) {
setError("No session ID provided");
setLoading(false);
return;
}
try {
// Fetch evaluation results from the API
const response = await fetch(
`http://localhost:3001/evaluation-results/${sessionId}`
);
if (!response.ok) {
throw new Error(`Failed to fetch results: ${response.status}`);
}
const data = await response.json();
if (!data.success) {
throw new Error(data.message || "Failed to fetch evaluation results");
}
setResults(data.results);
} catch (err) {
console.error("Error fetching evaluation results:", err);
setError(err.message);
} finally {
setLoading(false);
}
};
fetchEvaluationResults();
}, [sessionId]);
// Format accuracy as percentage
const formatAccuracy = (value) => {
return `${(value * 100).toFixed(2)}%`;
};
// Format evaluation time
const formatTime = (seconds) => {
return `${seconds.toFixed(2)}s`;
};
if (loading) {
return (
Loading Evaluation Results...
);
}
if (error) {
return (
{error}
);
}
if (
!results ||
!results.models_comparison ||
results.models_comparison.length === 0
) {
return (
No evaluation results found for this benchmark.
);
}
return (
Evaluation Results
Rank
Model
Provider
Accuracy
Std Error
Eval Time
Status
{results.models_comparison.map((model, index) => (
{index + 1}
{model.model_name}
{model.provider}
{model.success ? formatAccuracy(model.accuracy) : "-"}
{model.success ? formatAccuracy(model.accuracy_stderr) : "-"}
{model.success ? formatTime(model.evaluation_time) : "-"}
{model.success ? (
✓ Success
) : (
✗ Failed
)}
))}
Need larger evaluation?{" "}
Go to this page
);
};
export default EvaluationDisplay;