tfrere's picture
tfrere HF Staff
update
c78d2a1
raw
history blame
16.1 kB
import React from "react";
import {
Box,
Typography,
Paper,
Stack,
Divider,
alpha,
Link,
} from "@mui/material";
import PageHeader from "../../components/PageHeader/PageHeader";
const StepNumber = ({ number }) => (
<Box
sx={{
width: 32,
height: 32,
borderRadius: "50%",
display: "flex",
alignItems: "center",
justifyContent: "center",
border: "1px solid",
borderColor: "primary.main",
color: "primary.main",
fontSize: "0.875rem",
fontWeight: 600,
flexShrink: 0,
bgcolor: "transparent",
}}
>
{number}
</Box>
);
const Section = ({ title, children }) => (
<Paper
elevation={0}
sx={{
border: "1px solid",
borderColor: "grey.300",
borderRadius: 1,
overflow: "hidden",
mb: 3,
}}
>
<Box
sx={{
px: 3,
py: 2,
borderBottom: "1px solid",
borderColor: (theme) =>
theme.palette.mode === "dark"
? alpha(theme.palette.divider, 0.1)
: "grey.200",
bgcolor: (theme) =>
theme.palette.mode === "dark"
? alpha(theme.palette.background.paper, 0.5)
: "grey.50",
}}
>
<Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}>
{title}
</Typography>
</Box>
<Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box>
</Paper>
);
const Tag = ({ children }) => (
<Box
component="span"
sx={{
display: "inline-block",
px: 1.5,
py: 0.5,
bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1),
color: "primary.main",
borderRadius: 1,
fontSize: "0.875rem",
fontWeight: 600,
mr: 1,
mb: 1,
}}
>
{children}
</Box>
);
const TagSection = ({ title, description, tags, explanations }) => (
<Box sx={{ mb: 4 }}>
<Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}>
{title}
</Typography>
{description && (
<Typography variant="body1" sx={{ mb: 2, color: "text.secondary" }}>
{description}
</Typography>
)}
<Stack spacing={1}>
{tags.map((tag, index) => (
<Box key={index}>
<Tag>{tag}</Tag>
{explanations && explanations[index] && (
<Typography
component="span"
variant="body2"
sx={{ color: "text.secondary", ml: 1 }}
dangerouslySetInnerHTML={{ __html: explanations[index] }}
/>
)}
</Box>
))}
</Stack>
</Box>
);
const CodeBlock = ({ children }) => (
<Box
sx={{
backgroundColor: (theme) =>
theme.palette.mode === "dark"
? "rgba(255, 255, 255, 0.05)"
: "rgba(0, 0, 0, 0.03)",
p: 2,
borderRadius: 1,
fontFamily: "monospace",
mb: 2,
"& .key": {
color: (theme) => theme.palette.primary.main,
},
"& .value": {
color: (theme) =>
theme.palette.mode === "dark"
? theme.palette.success.light
: theme.palette.success.dark,
},
"& .comment": {
color: (theme) => theme.palette.text.secondary,
},
"& .punctuation": {
color: (theme) => theme.palette.text.primary,
},
}}
>
{children}
</Box>
);
const HowToSubmitPage = () => {
return (
<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
<PageHeader
title="How to submit ?"
subtitle={
<>
Join the <span style={{ fontWeight: 600 }}>community</span> of{" "}
<span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span>
</>
}
/>
<Section title="Configuration steps">
<Stack spacing={4}>
<Typography variant="body1" color="text.secondary">
Your leaderboard must be hosted on a{" "}
<Link
href="https://huggingface.co/docs/hub/spaces"
target="_blank"
rel="noopener noreferrer"
>
Hugging Face Space
</Link>
.
<br />
Like{" "}
<Link
href="https://huggingface.co/docs/hub/model-cards"
target="_blank"
rel="noopener noreferrer"
>
model cards
</Link>
, your Space's <strong>README.md</strong> file should include
specific <strong>metadata</strong> in a YAML section at the top.
</Typography>
<Box
sx={{
display: "flex",
flexDirection: { xs: "column", md: "row" },
gap: 4,
position: "relative",
}}
>
<Box sx={{ flex: 4 }}>
<Stack spacing={4}>
<Stack spacing={3}>
<Stack direction="row" spacing={2} alignItems="center">
<StepNumber number={1} />
<Typography
variant="subtitle1"
sx={{
fontWeight: 600,
color: "text.primary",
letterSpacing: "-0.01em",
}}
>
Define the type
</Typography>
</Stack>
<Box sx={{ pl: 7 }}>
<Typography variant="body2" color="text.secondary">
Add either the <strong>leaderboard</strong> or{" "}
<strong>arena</strong> tag.
</Typography>
</Box>
</Stack>
<Stack spacing={3}>
<Stack direction="row" spacing={2} alignItems="center">
<StepNumber number={2} />
<Typography
variant="subtitle1"
sx={{
fontWeight: 600,
color: "text.primary",
letterSpacing: "-0.01em",
}}
>
Add a description
</Typography>
</Stack>
<Box sx={{ pl: 7 }}>
<Typography variant="body2" color="text.secondary">
Include a <strong>short_description</strong> field to
explain the purpose of your evaluation.
</Typography>
</Box>
</Stack>
<Stack spacing={3}>
<Stack direction="row" spacing={2} alignItems="center">
<StepNumber number={3} />
<Typography
variant="subtitle1"
sx={{
fontWeight: 600,
color: "text.primary",
letterSpacing: "-0.01em",
}}
>
Specify metadata
</Typography>
</Stack>
<Box sx={{ pl: 7 }}>
<Typography variant="body2" color="text.secondary">
Add <strong>metadata tags</strong> to categorize your
evaluation and help users understand its characteristics.
</Typography>
</Box>
</Stack>
</Stack>
</Box>
<Divider
orientation="vertical"
flexItem
sx={{
display: { xs: "none", md: "block" },
}}
/>
<Box sx={{ flex: 5 }}>
<CodeBlock>
---
<br />
<span className="key">short_description</span>
<span className="punctuation">:</span>{" "}
<span className="value">
Evaluating LLMs on math reasoning tasks
</span>
<br />
<span className="key">tags</span>
<span className="punctuation">:</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">leaderboard</span>
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
Type of leaderboard
</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">submission:automatic</span>{" "}
<span className="comment"># How models are submitted</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">test:public</span>{" "}
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# Test
set visibility
</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">judge:function</span>{" "}
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# Evaluation method
</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">modality:text</span>{" "}
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# Input/output type
</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">language:english</span>{" "}
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;# Language coverage
</span>
<br />
<span className="punctuation">&nbsp;&nbsp;-</span>{" "}
<span className="value">domain:financial</span>{" "}
<span className="comment">
&nbsp;&nbsp;&nbsp;&nbsp;# Specific domain
</span>
<br />
---
</CodeBlock>
</Box>
</Box>
</Stack>
</Section>
<Section title="What do the tags mean?">
<TagSection
title="Submission type"
description="Arenas are not concerned by this category."
tags={[
"submission:automatic",
"submission:semiautomatic",
"submission:manual",
"submission:closed",
]}
explanations={[
"users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention",
"the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results",
"the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions",
"the leaderboard <strong>does not accept</strong> submissions at the moment",
]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Test set status"
description="Arenas are not concerned by this category."
tags={["test:public", "test:mix", "test:private", "test:rolling"]}
explanations={[
"all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>",
"some test sets are <strong>public</strong> and some <strong>private</strong>",
"all the test sets used are <strong>private</strong>, the evaluations are hard to game",
"the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed",
]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Judges"
tags={[
"judge:function",
"judge:model",
"judge:humans",
"judge:vibe_check",
]}
explanations={[
"evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>",
"evaluations are run using a <strong>model as a judge</strong> approach to rate answer",
"evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>",
"evaluations are <strong>done manually</strong> by one or several humans",
]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Modalities"
description="Can be any (or several) of the following list:"
tags={[
"modality:text",
"modality:image",
"modality:audio",
"modality:video",
"modality:tools",
"modality:artefacts",
]}
explanations={[
"",
"",
"",
"",
"requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)",
"the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong> (a bit outside of usual modalities)",
]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Evaluation categories"
description="Can be any (or several) of the following list:"
tags={[
"eval:generation",
"eval:math",
"eval:code",
"eval:performance",
"eval:safety",
"task:rag",
]}
explanations={[
"the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)",
"the evaluation tests <strong>math abilities</strong>",
"the evaluation tests <strong>coding capabilities</strong>",
"model <strong>performance</strong> (speed, energy consumption, ...)",
"the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>",
"the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities",
]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Language"
description="You can indicate the languages covered by your benchmark like so: language:mylanguage. At the moment, we do not support language codes, please use the language name in English."
tags={["language:english", "language:french"]}
/>
<Divider sx={{ my: 3 }} />
<TagSection
title="Domain"
description="Indicates the specific domain of the leaderboard:"
tags={["domain:financial", "domain:medical", "domain:legal"]}
/>
<Typography
variant="body2"
sx={{
mt: 1,
color: "text.secondary",
fontSize: "0.875rem",
fontStyle: "italic",
}}
>
If you would like to see a domain that is not currently represented,
please contact{" "}
<Link
href="https://huggingface.co/clementine"
target="_blank"
rel="noopener noreferrer"
sx={{
color: "primary.main",
textDecoration: "none",
"&:hover": {
textDecoration: "underline",
},
}}
>
Clementine Fourrier
</Link>{" "}
on Hugging Face.
</Typography>
</Section>
</Box>
);
};
export default HowToSubmitPage;