Spaces:

OpenEvals
/

find-a-leaderboard

Running

App Files Files Community

find-a-leaderboard / client /src /pages /HowToSubmitPage /HowToSubmitPage.jsx

tfrere

add hallucination to how to submit | fix show arena only behaviour

de3d81e 4 months ago

raw

history blame

24.2 kB

	import React from "react";
	import {
	Box,
	Typography,
	Paper,
	Stack,
	Divider,
	alpha,
	Link,
	Grid,
	InputLabel,
	Tooltip,
	IconButton,
	} from "@mui/material";
	import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined";
	import PageHeader from "../../components/PageHeader/PageHeader";

	const StepNumber = ({ number }) => (
	<Box
	sx={{
	width: 32,
	height: 32,
	borderRadius: "50%",
	display: "flex",
	alignItems: "center",
	justifyContent: "center",
	border: "1px solid",
	borderColor: "primary.main",
	color: "primary.main",
	fontSize: "0.875rem",
	fontWeight: 600,
	flexShrink: 0,
	bgcolor: "transparent",
	}}
	>
	{number}
	</Box>
	);

	const Section = ({ title, children }) => (
	<Paper
	elevation={0}
	sx={{
	border: "1px solid",
	borderColor: "divider",
	borderRadius: 1,
	overflow: "hidden",
	mb: 3,
	}}
	>
	<Box
	sx={{
	px: 3,
	py: 2,
	borderBottom: "1px solid",
	borderColor: "divider",
	bgcolor: (theme) =>
	theme.palette.mode === "dark"
	? alpha(theme.palette.background.paper, 0.5)
	: "grey.50",
	}}
	>
	<Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}>
	{title}
	</Typography>
	</Box>
	<Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box>
	</Paper>
	);

	const Tag = ({ children }) => (
	<Box
	component="span"
	sx={{
	display: "inline-block",
	px: 1.5,
	py: 0.5,
	bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1),
	color: "primary.main",
	borderRadius: 1,
	fontSize: "0.875rem",
	fontWeight: 600,
	mr: 1,
	mb: 1,
	}}
	>
	{children}
	</Box>
	);

	const TagCard = ({ title, description, tags, explanations }) => (
	<Paper
	elevation={1}
	sx={{
	p: 3,
	height: "100%",
	display: "flex",
	flexDirection: "column",
	borderRadius: 2,
	border: "1px solid",
	borderColor: "grey.200",
	}}
	>
	<Typography variant="h6" sx={{ fontWeight: 600, mb: 2 }}>
	{title}
	</Typography>
	{description && (
	<Typography variant="body2" sx={{ mb: 2, color: "text.secondary" }}>
	{description}
	</Typography>
	)}
	<Box sx={{ flex: 1 }}>
	{tags.map((tag, index) => (
	<Box key={index} sx={{ mb: 2 }}>
	<Tag>{tag}</Tag>
	{explanations && explanations[index] && (
	<Typography
	variant="body2"
	sx={{
	color: "text.secondary",
	mt: 1,
	display: "block",
	}}
	dangerouslySetInnerHTML={{ __html: explanations[index] }}
	/>
	)}
	</Box>
	))}
	</Box>
	</Paper>
	);

	const CodeBlock = ({ children }) => (
	<Box
	sx={{
	backgroundColor: (theme) =>
	alpha(
	theme.palette.primary.main,
	theme.palette.mode === "dark" ? 0.15 : 0.05
	),
	px: 2,
	py: 4,
	borderRadius: 1,
	fontFamily: "monospace",
	mb: 2,
	position: "relative",
	"& .key": {
	color: (theme) => theme.palette.primary.main,
	},
	"& .value": {
	color: (theme) =>
	theme.palette.mode === "dark"
	? theme.palette.success.light
	: theme.palette.success.dark,
	},
	"& .comment": {
	color: (theme) => theme.palette.text.secondary,
	},
	"& .punctuation": {
	color: (theme) => theme.palette.text.primary,
	},
	}}
	>
	<InputLabel
	sx={{
	position: "absolute",
	right: 8,
	top: 8,
	fontSize: "0.75rem",
	color: "text.secondary",
	fontFamily: "monospace",
	bgcolor: "background.paper",
	px: 1,
	py: 0.5,
	borderRadius: 1,
	border: "1px solid",
	borderColor: "divider",
	zIndex: 1,
	}}
	>
	README.md
	</InputLabel>
	{children}
	</Box>
	);

	const getTagEmoji = (tag) => {
	const type = tag.split(":")[0];
	const name = tag.split(":")[1];

	const emojiMap = {
	submission: {
	automatic: "🤖",
	semiautomatic: "🔄",
	manual: "👨‍💻",
	closed: "🔒",
	},
	test: {
	public: "👀",
	mix: "🔀",
	private: "🔐",
	rolling: "🎲",
	},
	judge: {
	function: "⚙️",
	model: "🧠",
	humans: "👥",
	vibeCheck: "✨",
	},
	modality: {
	text: "📝",
	image: "🖼️",
	audio: "🎵",
	video: "🎥",
	tools: "🛠️",
	artefacts: "🏺",
	embeddings: "🔤",
	},
	eval: {
	generation: "✨",
	math: "🔢",
	code: "💻",
	reasoning: "🧠",
	performance: "⚡",
	safety: "🛡️",
	hallucination: "🌫️",
	},
	task: {
	rag: "🔍",
	},
	language: {
	english: "🇬🇧",
	french: "🇫🇷",
	yourOwnLanguage: "🌍",
	},
	domain: {
	financial: "💰",
	medical: "⚕️",
	legal: "⚖️",
	biology: "🧬",
	translation: "🔄",
	chemistry: "🧪",
	physics: "⚛️",
	commercial: "🏢",
	},
	};

	return emojiMap[type]?.[name] \|\| "🏷️";
	};

	const TagItem = ({ tag, explanation }) => {
	// Extract the name without prefix
	const name = tag.split(":")[1];
	const emoji = getTagEmoji(tag);

	return (
	<Paper
	elevation={0}
	sx={{
	height: "100%",
	display: "flex",
	flexDirection: "column",
	borderRadius: 2,
	border: "1px solid",
	borderColor: "divider",
	overflow: "hidden",
	}}
	>
	<Box
	sx={{
	bgcolor: (theme) =>
	alpha(
	theme.palette.primary.main,
	theme.palette.mode === "dark" ? 0.15 : 0.05
	),
	py: 2,
	px: 2,
	borderRadius: 0,
	mb: 2,
	position: "relative",
	}}
	>
	<Typography
	variant="h6"
	sx={{
	fontWeight: 700,
	color: "text.primary",
	letterSpacing: "-0.02em",
	pr: 5,
	textTransform: "capitalize",
	}}
	>
	{emoji}    {name}
	</Typography>
	</Box>
	<Box sx={{ px: 2, pb: 2 }}>
	<Typography
	variant="body2"
	sx={{
	color: "text.secondary",
	mb: 2,
	fontSize: "0.75rem",
	}}
	>
	<strong>{tag.split(":")[0]}</strong>:{tag.split(":")[1]}
	</Typography>
	{explanation && (
	<Typography
	variant="body2"
	sx={{
	color: "text.secondary",
	flex: 1,
	}}
	dangerouslySetInnerHTML={{ __html: explanation }}
	/>
	)}
	</Box>
	</Paper>
	);
	};

	const TagSection = ({ title, description, tags, explanations }) => {
	// Determine if this section should have 4 columns
	const shouldHaveFourColumns = [
	"Submission type",
	"Test set status",
	"Judges",
	"Domain",
	].includes(title);

	return (
	<Box sx={{ mb: 8 }}>
	<Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}>
	{title}
	</Typography>
	{description && (
	<Typography variant="body1" sx={{ mb: 4, color: "text.secondary" }}>
	{description}
	</Typography>
	)}
	<Grid container spacing={2}>
	{tags.map((tag, index) => (
	<Grid
	item
	xs={12}
	sm={6}
	md={shouldHaveFourColumns ? 3 : 4}
	key={index}
	>
	<TagItem
	tag={tag}
	explanation={explanations ? explanations[index] : null}
	/>
	</Grid>
	))}
	</Grid>
	</Box>
	);
	};

	const HowToSubmitPage = () => {
	return (
	<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
	<PageHeader
	title="How to submit ?"
	subtitle={
	<>
	Join the <span style={{ fontWeight: 600 }}>community</span> of{" "}
	<span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span>
	</>
	}
	/>

	<Section title="Configuration steps">
	<Box
	sx={{
	display: "flex",
	gap: 4,
	flexDirection: { xs: "column", md: "column", lg: "row" },
	}}
	>
	<Stack spacing={4} sx={{ flex: { xs: "1 1 auto", md: "0 0 45%" } }}>
	<Stack spacing={3}>
	<Stack direction="row" spacing={2} alignItems="center">
	<StepNumber number={1} />
	<Typography
	variant="subtitle1"
	sx={{
	fontWeight: 600,
	color: "text.primary",
	letterSpacing: "-0.01em",
	}}
	>
	Create a Space
	</Typography>
	</Stack>
	<Box sx={{ pl: 7 }}>
	<Typography variant="body2" color="text.secondary">
	Your leaderboard must be hosted on a{" "}
	<Link
	href="https://huggingface.co/docs/hub/spaces"
	target="_blank"
	rel="noopener noreferrer"
	>
	Hugging Face Space
	</Link>
	.
	</Typography>
	</Box>
	</Stack>

	<Stack spacing={3}>
	<Stack direction="row" spacing={2} alignItems="center">
	<StepNumber number={2} />
	<Typography
	variant="subtitle1"
	sx={{
	fontWeight: 600,
	color: "text.primary",
	letterSpacing: "-0.01em",
	}}
	>
	Add metadata
	</Typography>
	</Stack>
	<Box sx={{ pl: 7 }}>
	<Typography
	variant="body2"
	color="text.secondary"
	sx={{ mb: 2 }}
	>
	Like{" "}
	<Link
	href="https://huggingface.co/docs/hub/model-cards"
	target="_blank"
	rel="noopener noreferrer"
	>
	model cards
	</Link>
	, your Space's{" "}
	<InputLabel
	sx={{
	display: "inline-flex",
	fontSize: "0.75rem",
	color: "text.secondary",
	fontFamily: "monospace",
	bgcolor: "background.paper",
	px: 1,
	py: 0.5,
	borderRadius: 1,
	border: "1px solid",
	borderColor: "divider",
	mx: 0.5,
	}}
	>
	README.md
	</InputLabel>{" "}
	file should include specific <strong>metadata</strong> in a
	YAML section at the top:
	</Typography>
	<ul
	style={{
	margin: 0,
	paddingLeft: "20px",
	color: "text.secondary",
	}}
	>
	<li>
	<Typography
	variant="body2"
	color="text.secondary"
	sx={{ display: "flex", alignItems: "center", gap: 0.5 }}
	>
	Add either the <strong>leaderboard</strong> or{" "}
	<strong>arena</strong> tag
	<Tooltip
	title={
	<Box sx={{ p: 1, maxWidth: 300 }}>
	<Typography
	variant="subtitle2"
	sx={{
	mb: 1,
	fontWeight: 600,
	color: "text.secondary",
	}}
	>
	Choose between:
	</Typography>
	<Typography
	variant="body2"
	component="div"
	sx={{ mb: 1 }}
	>
	• <strong>arena</strong> - for human evaluations
	<br />
	<Box component="span" sx={{ pl: 2 }}>
	requires <Tag>judge:humans</Tag>
	</Box>
	</Typography>
	<Typography variant="body2" component="div">
	• <strong>leaderboard</strong> - for automated
	evaluations
	<br />
	<Box component="span" sx={{ pl: 2 }}>
	with <Tag>judge:function</Tag> or{" "}
	<Tag>judge:model</Tag>
	</Box>
	</Typography>
	</Box>
	}
	arrow
	placement="right"
	componentsProps={{
	tooltip: {
	sx: {
	bgcolor: "background.paper",
	color: "text.primary",
	"& .MuiTooltip-arrow": {
	color: "background.paper",
	},
	boxShadow: (theme) => theme.shadows[2],
	},
	},
	}}
	>
	<IconButton
	size="small"
	sx={{
	p: 0.5,
	color: "text.secondary",
	"&:hover": {
	color: "primary.main",
	bgcolor: (theme) =>
	alpha(theme.palette.primary.main, 0.1),
	},
	}}
	>
	<InfoOutlinedIcon sx={{ fontSize: "1rem" }} />
	</IconButton>
	</Tooltip>
	</Typography>
	</li>
	<li>
	<Typography variant="body2" color="text.secondary">
	Include a <strong>short_description</strong> field to
	explain the purpose of your evaluation
	</Typography>
	</li>
	<li>
	<Typography variant="body2" color="text.secondary">
	Add <strong>metadata tags</strong> to categorize your
	evaluation (see examples on the right)
	</Typography>
	</li>
	</ul>
	</Box>
	</Stack>
	</Stack>

	<Box sx={{ flex: 1 }}>
	<CodeBlock>
	---
	<br />
	<span className="key">short_description</span>
	<span className="punctuation">:</span>{" "}
	<span className="value">
	Evaluating LLMs on math reasoning tasks
	</span>
	<br />
	<span className="key">tags</span>
	<span className="punctuation">:</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">leaderboard</span>
	<span className="comment">
	#
	Type of leaderboard
	</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">submission:automatic</span>{" "}
	<span className="comment"># How models are submitted</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">test:public</span>{" "}
	<span className="comment">
	# Test set
	visibility
	</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">judge:function</span>{" "}
	<span className="comment">
	# Evaluation method
	</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">modality:text</span>{" "}
	<span className="comment">
	# Input/output type
	</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">language:english</span>{" "}
	<span className="comment">
	# Language coverage
	</span>
	<br />
	<span className="punctuation">  -</span>{" "}
	<span className="value">domain:financial</span>{" "}
	<span className="comment">
	# Specific domain
	</span>
	<br />
	---
	</CodeBlock>
	</Box>
	</Box>
	</Section>

	<Section title="What do the tags mean?">
	<TagSection
	title="Domain"
	description="Indicates the specific domain of the leaderboard"
	tags={[
	"domain:medical",
	"domain:chemistry",
	"domain:physics",
	"domain:biology",
	"domain:financial",
	"domain:legal",
	"domain:commercial",
	"domain:translation",
	]}
	/>

	<TagSection
	title="Modalities"
	description="Can be any (or several) of the following list"
	tags={[
	"modality:text",
	"modality:image",
	"modality:audio",
	"modality:video",
	"modality:agent",
	"modality:artefacts",
	"modality:3d",
	]}
	explanations={[
	"",
	"",
	"",
	"",
	"requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)",
	"the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong>",
	"",
	]}
	/>

	<TagSection
	title="Evaluation categories"
	description="Can be any (or several) of the following list"
	tags={[
	"eval:generation",
	"eval:math",
	"eval:code",
	"eval:reasoning",
	"eval:performance",
	"eval:safety",
	"eval:hallucination",
	"eval:rag",
	]}
	explanations={[
	"the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)",
	"the evaluation tests <strong>math abilities</strong>",
	"the evaluation tests <strong>coding capabilities</strong>",
	"the evaluation tests <strong>reasoning abilities</strong>",
	"model <strong>performance</strong> (speed, energy consumption, ...)",
	"the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>",
	"the evaluation measures the model's tendency to <strong>hallucinate</strong> or generate <strong>false information</strong>",
	"the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities",
	]}
	/>

	<TagSection
	title="Language"
	description="You can indicate the languages covered by your benchmark like so: language:mylanguage."
	tags={[
	"language:english",
	"language:french",
	"language:your own language",
	]}
	explanations={[
	"",
	"",
	"At the moment, we do not support language codes, please use the language name in English.",
	]}
	/>

	<TagSection
	title="Submission type"
	description="Arenas are not concerned by this category."
	tags={[
	"submission:automatic",
	"submission:semiautomatic",
	"submission:manual",
	"submission:closed",
	]}
	explanations={[
	"users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention",
	"the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results",
	"the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions",
	"the leaderboard <strong>does not accept</strong> submissions at the moment",
	]}
	/>

	<TagSection
	title="Test set status"
	description="Arenas are not concerned by this category."
	tags={["test:public", "test:mix", "test:private", "test:rolling"]}
	explanations={[
	"all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>",
	"some test sets are <strong>public</strong> and some <strong>private</strong>",
	"all the test sets used are <strong>private</strong>, the evaluations are hard to game",
	"the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed",
	]}
	/>

	<TagSection
	title="Judges"
	tags={[
	"judge:function",
	"judge:model",
	"judge:humans",
	"judge:vibe check",
	]}
	explanations={[
	"evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>",
	"evaluations are run using a <strong>model as a judge</strong> approach to rate answer",
	"evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>",
	"evaluations are <strong>done manually</strong> by one or several humans",
	]}
	/>

	<Typography
	variant="body2"
	sx={{
	mt: 3,
	color: "text.secondary",
	fontSize: "0.875rem",
	fontStyle: "italic",
	}}
	>
	If you would like to see a tag that is not currently represented,
	please contact{" "}
	<Link
	href="https://huggingface.co/clementine"
	target="_blank"
	rel="noopener noreferrer"
	sx={{
	color: "primary.main",
	textDecoration: "none",
	"&:hover": {
	textDecoration: "underline",
	},
	}}
	>
	Clémentine Fourrier
	</Link>{" "}
	on Hugging Face.
	</Typography>
	</Section>
	</Box>
	);
	};

	export default HowToSubmitPage;