Spaces:
Running
Running
import React from "react"; | |
import { | |
Box, | |
Typography, | |
Paper, | |
Stack, | |
Divider, | |
alpha, | |
Link, | |
} from "@mui/material"; | |
import PageHeader from "../../components/PageHeader/PageHeader"; | |
const StepNumber = ({ number }) => ( | |
<Box | |
sx={{ | |
width: 32, | |
height: 32, | |
borderRadius: "50%", | |
display: "flex", | |
alignItems: "center", | |
justifyContent: "center", | |
border: "1px solid", | |
borderColor: "primary.main", | |
color: "primary.main", | |
fontSize: "0.875rem", | |
fontWeight: 600, | |
flexShrink: 0, | |
bgcolor: "transparent", | |
}} | |
> | |
{number} | |
</Box> | |
); | |
const Section = ({ title, children }) => ( | |
<Paper | |
elevation={0} | |
sx={{ | |
border: "1px solid", | |
borderColor: "grey.300", | |
borderRadius: 1, | |
overflow: "hidden", | |
mb: 3, | |
}} | |
> | |
<Box | |
sx={{ | |
px: 3, | |
py: 2, | |
borderBottom: "1px solid", | |
borderColor: (theme) => | |
theme.palette.mode === "dark" | |
? alpha(theme.palette.divider, 0.1) | |
: "grey.200", | |
bgcolor: (theme) => | |
theme.palette.mode === "dark" | |
? alpha(theme.palette.background.paper, 0.5) | |
: "grey.50", | |
}} | |
> | |
<Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}> | |
{title} | |
</Typography> | |
</Box> | |
<Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box> | |
</Paper> | |
); | |
const Tag = ({ children }) => ( | |
<Box | |
component="span" | |
sx={{ | |
display: "inline-block", | |
px: 1.5, | |
py: 0.5, | |
bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1), | |
color: "primary.main", | |
borderRadius: 1, | |
fontSize: "0.875rem", | |
fontWeight: 600, | |
mr: 1, | |
mb: 1, | |
}} | |
> | |
{children} | |
</Box> | |
); | |
const TagSection = ({ title, description, tags, explanations }) => ( | |
<Box sx={{ mb: 4 }}> | |
<Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}> | |
{title} | |
</Typography> | |
{description && ( | |
<Typography variant="body1" sx={{ mb: 2, color: "text.secondary" }}> | |
{description} | |
</Typography> | |
)} | |
<Stack spacing={1}> | |
{tags.map((tag, index) => ( | |
<Box key={index}> | |
<Tag>{tag}</Tag> | |
{explanations && explanations[index] && ( | |
<Typography | |
component="span" | |
variant="body2" | |
sx={{ color: "text.secondary", ml: 1 }} | |
dangerouslySetInnerHTML={{ __html: explanations[index] }} | |
/> | |
)} | |
</Box> | |
))} | |
</Stack> | |
</Box> | |
); | |
const CodeBlock = ({ children }) => ( | |
<Box | |
sx={{ | |
backgroundColor: (theme) => | |
theme.palette.mode === "dark" | |
? "rgba(255, 255, 255, 0.05)" | |
: "rgba(0, 0, 0, 0.03)", | |
p: 2, | |
borderRadius: 1, | |
fontFamily: "monospace", | |
mb: 2, | |
"& .key": { | |
color: (theme) => theme.palette.primary.main, | |
}, | |
"& .value": { | |
color: (theme) => | |
theme.palette.mode === "dark" | |
? theme.palette.success.light | |
: theme.palette.success.dark, | |
}, | |
"& .comment": { | |
color: (theme) => theme.palette.text.secondary, | |
}, | |
"& .punctuation": { | |
color: (theme) => theme.palette.text.primary, | |
}, | |
}} | |
> | |
{children} | |
</Box> | |
); | |
const HowToSubmitPage = () => { | |
return ( | |
<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}> | |
<PageHeader | |
title="How to submit ?" | |
subtitle={ | |
<> | |
Join the <span style={{ fontWeight: 600 }}>community</span> of{" "} | |
<span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span> | |
</> | |
} | |
/> | |
<Section title="Configuration steps"> | |
<Stack spacing={4}> | |
<Typography variant="body1" color="text.secondary"> | |
Your leaderboard must be hosted on a{" "} | |
<Link | |
href="https://huggingface.co/docs/hub/spaces" | |
target="_blank" | |
rel="noopener noreferrer" | |
> | |
Hugging Face Space | |
</Link> | |
. | |
<br /> | |
Like{" "} | |
<Link | |
href="https://huggingface.co/docs/hub/model-cards" | |
target="_blank" | |
rel="noopener noreferrer" | |
> | |
model cards | |
</Link> | |
, your Space's <strong>README.md</strong> file should include | |
specific <strong>metadata</strong> in a YAML section at the top. | |
</Typography> | |
<Box | |
sx={{ | |
display: "flex", | |
flexDirection: { xs: "column", md: "row" }, | |
gap: 4, | |
position: "relative", | |
}} | |
> | |
<Box sx={{ flex: 4 }}> | |
<Stack spacing={4}> | |
<Stack spacing={3}> | |
<Stack direction="row" spacing={2} alignItems="center"> | |
<StepNumber number={1} /> | |
<Typography | |
variant="subtitle1" | |
sx={{ | |
fontWeight: 600, | |
color: "text.primary", | |
letterSpacing: "-0.01em", | |
}} | |
> | |
Define the type | |
</Typography> | |
</Stack> | |
<Box sx={{ pl: 7 }}> | |
<Typography variant="body2" color="text.secondary"> | |
Add either the <strong>leaderboard</strong> or{" "} | |
<strong>arena</strong> tag. | |
</Typography> | |
</Box> | |
</Stack> | |
<Stack spacing={3}> | |
<Stack direction="row" spacing={2} alignItems="center"> | |
<StepNumber number={2} /> | |
<Typography | |
variant="subtitle1" | |
sx={{ | |
fontWeight: 600, | |
color: "text.primary", | |
letterSpacing: "-0.01em", | |
}} | |
> | |
Add a description | |
</Typography> | |
</Stack> | |
<Box sx={{ pl: 7 }}> | |
<Typography variant="body2" color="text.secondary"> | |
Include a <strong>short_description</strong> field to | |
explain the purpose of your evaluation. | |
</Typography> | |
</Box> | |
</Stack> | |
<Stack spacing={3}> | |
<Stack direction="row" spacing={2} alignItems="center"> | |
<StepNumber number={3} /> | |
<Typography | |
variant="subtitle1" | |
sx={{ | |
fontWeight: 600, | |
color: "text.primary", | |
letterSpacing: "-0.01em", | |
}} | |
> | |
Specify metadata | |
</Typography> | |
</Stack> | |
<Box sx={{ pl: 7 }}> | |
<Typography variant="body2" color="text.secondary"> | |
Add <strong>metadata tags</strong> to categorize your | |
evaluation and help users understand its characteristics. | |
</Typography> | |
</Box> | |
</Stack> | |
</Stack> | |
</Box> | |
<Divider | |
orientation="vertical" | |
flexItem | |
sx={{ | |
display: { xs: "none", md: "block" }, | |
}} | |
/> | |
<Box sx={{ flex: 5 }}> | |
<CodeBlock> | |
--- | |
<br /> | |
<span className="key">short_description</span> | |
<span className="punctuation">:</span>{" "} | |
<span className="value"> | |
Evaluating LLMs on math reasoning tasks | |
</span> | |
<br /> | |
<span className="key">tags</span> | |
<span className="punctuation">:</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">leaderboard</span> | |
<span className="comment"> | |
# | |
Type of leaderboard | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">submission:automatic</span>{" "} | |
<span className="comment"># How models are submitted</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">test:public</span>{" "} | |
<span className="comment"> | |
# Test | |
set visibility | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">judge:function</span>{" "} | |
<span className="comment"> | |
# Evaluation method | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">modality:text</span>{" "} | |
<span className="comment"> | |
# Input/output type | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">language:english</span>{" "} | |
<span className="comment"> | |
# Language coverage | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">domain:financial</span>{" "} | |
<span className="comment"> | |
# Specific domain | |
</span> | |
<br /> | |
--- | |
</CodeBlock> | |
</Box> | |
</Box> | |
</Stack> | |
</Section> | |
<Section title="What do the tags mean?"> | |
<TagSection | |
title="Submission type" | |
description="Arenas are not concerned by this category." | |
tags={[ | |
"submission:automatic", | |
"submission:semiautomatic", | |
"submission:manual", | |
"submission:closed", | |
]} | |
explanations={[ | |
"users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention", | |
"the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results", | |
"the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions", | |
"the leaderboard <strong>does not accept</strong> submissions at the moment", | |
]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Test set status" | |
description="Arenas are not concerned by this category." | |
tags={["test:public", "test:mix", "test:private", "test:rolling"]} | |
explanations={[ | |
"all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>", | |
"some test sets are <strong>public</strong> and some <strong>private</strong>", | |
"all the test sets used are <strong>private</strong>, the evaluations are hard to game", | |
"the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed", | |
]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Judges" | |
tags={[ | |
"judge:function", | |
"judge:model", | |
"judge:humans", | |
"judge:vibe_check", | |
]} | |
explanations={[ | |
"evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>", | |
"evaluations are run using a <strong>model as a judge</strong> approach to rate answer", | |
"evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>", | |
"evaluations are <strong>done manually</strong> by one or several humans", | |
]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Modalities" | |
description="Can be any (or several) of the following list:" | |
tags={[ | |
"modality:text", | |
"modality:image", | |
"modality:audio", | |
"modality:video", | |
"modality:tools", | |
"modality:artefacts", | |
]} | |
explanations={[ | |
"", | |
"", | |
"", | |
"", | |
"requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)", | |
"the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong> (a bit outside of usual modalities)", | |
]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Evaluation categories" | |
description="Can be any (or several) of the following list:" | |
tags={[ | |
"eval:generation", | |
"eval:math", | |
"eval:code", | |
"eval:performance", | |
"eval:safety", | |
"task:rag", | |
]} | |
explanations={[ | |
"the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)", | |
"the evaluation tests <strong>math abilities</strong>", | |
"the evaluation tests <strong>coding capabilities</strong>", | |
"model <strong>performance</strong> (speed, energy consumption, ...)", | |
"the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>", | |
"the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities", | |
]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Language" | |
description="You can indicate the languages covered by your benchmark like so: language:mylanguage. At the moment, we do not support language codes, please use the language name in English." | |
tags={["language:english", "language:french"]} | |
/> | |
<Divider sx={{ my: 3 }} /> | |
<TagSection | |
title="Domain" | |
description="Indicates the specific domain of the leaderboard:" | |
tags={["domain:financial", "domain:medical", "domain:legal"]} | |
/> | |
<Typography | |
variant="body2" | |
sx={{ | |
mt: 1, | |
color: "text.secondary", | |
fontSize: "0.875rem", | |
fontStyle: "italic", | |
}} | |
> | |
If you would like to see a domain that is not currently represented, | |
please contact{" "} | |
<Link | |
href="https://huggingface.co/clementine" | |
target="_blank" | |
rel="noopener noreferrer" | |
sx={{ | |
color: "primary.main", | |
textDecoration: "none", | |
"&:hover": { | |
textDecoration: "underline", | |
}, | |
}} | |
> | |
Clementine Fourrier | |
</Link>{" "} | |
on Hugging Face. | |
</Typography> | |
</Section> | |
</Box> | |
); | |
}; | |
export default HowToSubmitPage; | |