Spaces:
Running
Running
import React from "react"; | |
import { | |
Box, | |
Typography, | |
Paper, | |
Stack, | |
Divider, | |
alpha, | |
Link, | |
Grid, | |
InputLabel, | |
Tooltip, | |
IconButton, | |
} from "@mui/material"; | |
import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; | |
import PageHeader from "../../components/PageHeader/PageHeader"; | |
const StepNumber = ({ number }) => ( | |
<Box | |
sx={{ | |
width: 32, | |
height: 32, | |
borderRadius: "50%", | |
display: "flex", | |
alignItems: "center", | |
justifyContent: "center", | |
border: "1px solid", | |
borderColor: "primary.main", | |
color: "primary.main", | |
fontSize: "0.875rem", | |
fontWeight: 600, | |
flexShrink: 0, | |
bgcolor: "transparent", | |
}} | |
> | |
{number} | |
</Box> | |
); | |
const Section = ({ title, children }) => ( | |
<Paper | |
elevation={0} | |
sx={{ | |
border: "1px solid", | |
borderColor: "divider", | |
borderRadius: 1, | |
overflow: "hidden", | |
mb: 3, | |
}} | |
> | |
<Box | |
sx={{ | |
px: 3, | |
py: 2, | |
borderBottom: "1px solid", | |
borderColor: "divider", | |
bgcolor: (theme) => | |
theme.palette.mode === "dark" | |
? alpha(theme.palette.background.paper, 0.5) | |
: "grey.50", | |
}} | |
> | |
<Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}> | |
{title} | |
</Typography> | |
</Box> | |
<Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box> | |
</Paper> | |
); | |
const Tag = ({ children }) => ( | |
<Box | |
component="span" | |
sx={{ | |
display: "inline-block", | |
px: 1.5, | |
py: 0.5, | |
bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1), | |
color: "primary.main", | |
borderRadius: 1, | |
fontSize: "0.875rem", | |
fontWeight: 600, | |
mr: 1, | |
mb: 1, | |
}} | |
> | |
{children} | |
</Box> | |
); | |
const TagCard = ({ title, description, tags, explanations }) => ( | |
<Paper | |
elevation={1} | |
sx={{ | |
p: 3, | |
height: "100%", | |
display: "flex", | |
flexDirection: "column", | |
borderRadius: 2, | |
border: "1px solid", | |
borderColor: "grey.200", | |
}} | |
> | |
<Typography variant="h6" sx={{ fontWeight: 600, mb: 2 }}> | |
{title} | |
</Typography> | |
{description && ( | |
<Typography variant="body2" sx={{ mb: 2, color: "text.secondary" }}> | |
{description} | |
</Typography> | |
)} | |
<Box sx={{ flex: 1 }}> | |
{tags.map((tag, index) => ( | |
<Box key={index} sx={{ mb: 2 }}> | |
<Tag>{tag}</Tag> | |
{explanations && explanations[index] && ( | |
<Typography | |
variant="body2" | |
sx={{ | |
color: "text.secondary", | |
mt: 1, | |
display: "block", | |
}} | |
dangerouslySetInnerHTML={{ __html: explanations[index] }} | |
/> | |
)} | |
</Box> | |
))} | |
</Box> | |
</Paper> | |
); | |
const CodeBlock = ({ children }) => ( | |
<Box | |
sx={{ | |
backgroundColor: (theme) => | |
alpha( | |
theme.palette.primary.main, | |
theme.palette.mode === "dark" ? 0.15 : 0.05 | |
), | |
px: 2, | |
py: 4, | |
borderRadius: 1, | |
fontFamily: "monospace", | |
mb: 2, | |
position: "relative", | |
"& .key": { | |
color: (theme) => theme.palette.primary.main, | |
}, | |
"& .value": { | |
color: (theme) => | |
theme.palette.mode === "dark" | |
? theme.palette.success.light | |
: theme.palette.success.dark, | |
}, | |
"& .comment": { | |
color: (theme) => theme.palette.text.secondary, | |
}, | |
"& .punctuation": { | |
color: (theme) => theme.palette.text.primary, | |
}, | |
}} | |
> | |
<InputLabel | |
sx={{ | |
position: "absolute", | |
right: 8, | |
top: 8, | |
fontSize: "0.75rem", | |
color: "text.secondary", | |
fontFamily: "monospace", | |
bgcolor: "background.paper", | |
px: 1, | |
py: 0.5, | |
borderRadius: 1, | |
border: "1px solid", | |
borderColor: "divider", | |
zIndex: 1, | |
}} | |
> | |
README.md | |
</InputLabel> | |
{children} | |
</Box> | |
); | |
const getTagEmoji = (tag) => { | |
const type = tag.split(":")[0]; | |
const name = tag.split(":")[1]; | |
const emojiMap = { | |
submission: { | |
automatic: "🤖", | |
semiautomatic: "🔄", | |
manual: "👨💻", | |
closed: "🔒", | |
}, | |
test: { | |
public: "👀", | |
mix: "🔀", | |
private: "🔐", | |
rolling: "🎲", | |
}, | |
judge: { | |
function: "⚙️", | |
model: "🧠", | |
humans: "👥", | |
vibeCheck: "✨", | |
}, | |
modality: { | |
text: "📝", | |
image: "🖼️", | |
audio: "🎵", | |
video: "🎥", | |
tools: "🛠️", | |
artefacts: "🏺", | |
embeddings: "🔤", | |
}, | |
eval: { | |
generation: "✨", | |
math: "🔢", | |
code: "💻", | |
reasoning: "🧠", | |
performance: "⚡", | |
safety: "🛡️", | |
hallucination: "🌫️", | |
}, | |
task: { | |
rag: "🔍", | |
}, | |
language: { | |
english: "🇬🇧", | |
french: "🇫🇷", | |
yourOwnLanguage: "🌍", | |
}, | |
domain: { | |
financial: "💰", | |
medical: "⚕️", | |
legal: "⚖️", | |
biology: "🧬", | |
translation: "🔄", | |
chemistry: "🧪", | |
physics: "⚛️", | |
commercial: "🏢", | |
}, | |
}; | |
return emojiMap[type]?.[name] || "🏷️"; | |
}; | |
const TagItem = ({ tag, explanation }) => { | |
// Extract the name without prefix | |
const name = tag.split(":")[1]; | |
const emoji = getTagEmoji(tag); | |
return ( | |
<Paper | |
elevation={0} | |
sx={{ | |
height: "100%", | |
display: "flex", | |
flexDirection: "column", | |
borderRadius: 2, | |
border: "1px solid", | |
borderColor: "divider", | |
overflow: "hidden", | |
}} | |
> | |
<Box | |
sx={{ | |
bgcolor: (theme) => | |
alpha( | |
theme.palette.primary.main, | |
theme.palette.mode === "dark" ? 0.15 : 0.05 | |
), | |
py: 2, | |
px: 2, | |
borderRadius: 0, | |
mb: 2, | |
position: "relative", | |
}} | |
> | |
<Typography | |
variant="h6" | |
sx={{ | |
fontWeight: 700, | |
color: "text.primary", | |
letterSpacing: "-0.02em", | |
pr: 5, | |
textTransform: "capitalize", | |
}} | |
> | |
{emoji} {name} | |
</Typography> | |
</Box> | |
<Box sx={{ px: 2, pb: 2 }}> | |
<Typography | |
variant="body2" | |
sx={{ | |
color: "text.secondary", | |
mb: 2, | |
fontSize: "0.75rem", | |
}} | |
> | |
<strong>{tag.split(":")[0]}</strong>:{tag.split(":")[1]} | |
</Typography> | |
{explanation && ( | |
<Typography | |
variant="body2" | |
sx={{ | |
color: "text.secondary", | |
flex: 1, | |
}} | |
dangerouslySetInnerHTML={{ __html: explanation }} | |
/> | |
)} | |
</Box> | |
</Paper> | |
); | |
}; | |
const TagSection = ({ title, description, tags, explanations }) => { | |
// Determine if this section should have 4 columns | |
const shouldHaveFourColumns = [ | |
"Submission type", | |
"Test set status", | |
"Judges", | |
"Domain", | |
].includes(title); | |
return ( | |
<Box sx={{ mb: 8 }}> | |
<Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}> | |
{title} | |
</Typography> | |
{description && ( | |
<Typography variant="body1" sx={{ mb: 4, color: "text.secondary" }}> | |
{description} | |
</Typography> | |
)} | |
<Grid container spacing={2}> | |
{tags.map((tag, index) => ( | |
<Grid | |
item | |
xs={12} | |
sm={6} | |
md={shouldHaveFourColumns ? 3 : 4} | |
key={index} | |
> | |
<TagItem | |
tag={tag} | |
explanation={explanations ? explanations[index] : null} | |
/> | |
</Grid> | |
))} | |
</Grid> | |
</Box> | |
); | |
}; | |
const HowToSubmitPage = () => { | |
return ( | |
<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}> | |
<PageHeader | |
title="How to submit ?" | |
subtitle={ | |
<> | |
Join the <span style={{ fontWeight: 600 }}>community</span> of{" "} | |
<span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span> | |
</> | |
} | |
/> | |
<Section title="Configuration steps"> | |
<Box | |
sx={{ | |
display: "flex", | |
gap: 4, | |
flexDirection: { xs: "column", md: "column", lg: "row" }, | |
}} | |
> | |
<Stack spacing={4} sx={{ flex: { xs: "1 1 auto", md: "0 0 45%" } }}> | |
<Stack spacing={3}> | |
<Stack direction="row" spacing={2} alignItems="center"> | |
<StepNumber number={1} /> | |
<Typography | |
variant="subtitle1" | |
sx={{ | |
fontWeight: 600, | |
color: "text.primary", | |
letterSpacing: "-0.01em", | |
}} | |
> | |
Create a Space | |
</Typography> | |
</Stack> | |
<Box sx={{ pl: 7 }}> | |
<Typography variant="body2" color="text.secondary"> | |
Your leaderboard must be hosted on a{" "} | |
<Link | |
href="https://huggingface.co/docs/hub/spaces" | |
target="_blank" | |
rel="noopener noreferrer" | |
> | |
Hugging Face Space | |
</Link> | |
. | |
</Typography> | |
</Box> | |
</Stack> | |
<Stack spacing={3}> | |
<Stack direction="row" spacing={2} alignItems="center"> | |
<StepNumber number={2} /> | |
<Typography | |
variant="subtitle1" | |
sx={{ | |
fontWeight: 600, | |
color: "text.primary", | |
letterSpacing: "-0.01em", | |
}} | |
> | |
Add metadata | |
</Typography> | |
</Stack> | |
<Box sx={{ pl: 7 }}> | |
<Typography | |
variant="body2" | |
color="text.secondary" | |
sx={{ mb: 2 }} | |
> | |
Like{" "} | |
<Link | |
href="https://huggingface.co/docs/hub/model-cards" | |
target="_blank" | |
rel="noopener noreferrer" | |
> | |
model cards | |
</Link> | |
, your Space's{" "} | |
<InputLabel | |
sx={{ | |
display: "inline-flex", | |
fontSize: "0.75rem", | |
color: "text.secondary", | |
fontFamily: "monospace", | |
bgcolor: "background.paper", | |
px: 1, | |
py: 0.5, | |
borderRadius: 1, | |
border: "1px solid", | |
borderColor: "divider", | |
mx: 0.5, | |
}} | |
> | |
README.md | |
</InputLabel>{" "} | |
file should include specific <strong>metadata</strong> in a | |
YAML section at the top: | |
</Typography> | |
<ul | |
style={{ | |
margin: 0, | |
paddingLeft: "20px", | |
color: "text.secondary", | |
}} | |
> | |
<li> | |
<Typography | |
variant="body2" | |
color="text.secondary" | |
sx={{ display: "flex", alignItems: "center", gap: 0.5 }} | |
> | |
Add either the <strong>leaderboard</strong> or{" "} | |
<strong>arena</strong> tag | |
<Tooltip | |
title={ | |
<Box sx={{ p: 1, maxWidth: 300 }}> | |
<Typography | |
variant="subtitle2" | |
sx={{ | |
mb: 1, | |
fontWeight: 600, | |
color: "text.secondary", | |
}} | |
> | |
Choose between: | |
</Typography> | |
<Typography | |
variant="body2" | |
component="div" | |
sx={{ mb: 1 }} | |
> | |
• <strong>arena</strong> - for human evaluations | |
<br /> | |
<Box component="span" sx={{ pl: 2 }}> | |
requires <Tag>judge:humans</Tag> | |
</Box> | |
</Typography> | |
<Typography variant="body2" component="div"> | |
• <strong>leaderboard</strong> - for automated | |
evaluations | |
<br /> | |
<Box component="span" sx={{ pl: 2 }}> | |
with <Tag>judge:function</Tag> or{" "} | |
<Tag>judge:model</Tag> | |
</Box> | |
</Typography> | |
</Box> | |
} | |
arrow | |
placement="right" | |
componentsProps={{ | |
tooltip: { | |
sx: { | |
bgcolor: "background.paper", | |
color: "text.primary", | |
"& .MuiTooltip-arrow": { | |
color: "background.paper", | |
}, | |
boxShadow: (theme) => theme.shadows[2], | |
}, | |
}, | |
}} | |
> | |
<IconButton | |
size="small" | |
sx={{ | |
p: 0.5, | |
color: "text.secondary", | |
"&:hover": { | |
color: "primary.main", | |
bgcolor: (theme) => | |
alpha(theme.palette.primary.main, 0.1), | |
}, | |
}} | |
> | |
<InfoOutlinedIcon sx={{ fontSize: "1rem" }} /> | |
</IconButton> | |
</Tooltip> | |
</Typography> | |
</li> | |
<li> | |
<Typography variant="body2" color="text.secondary"> | |
Include a <strong>short_description</strong> field to | |
explain the purpose of your evaluation | |
</Typography> | |
</li> | |
<li> | |
<Typography variant="body2" color="text.secondary"> | |
Add <strong>metadata tags</strong> to categorize your | |
evaluation (see examples on the right) | |
</Typography> | |
</li> | |
</ul> | |
</Box> | |
</Stack> | |
</Stack> | |
<Box sx={{ flex: 1 }}> | |
<CodeBlock> | |
--- | |
<br /> | |
<span className="key">short_description</span> | |
<span className="punctuation">:</span>{" "} | |
<span className="value"> | |
Evaluating LLMs on math reasoning tasks | |
</span> | |
<br /> | |
<span className="key">tags</span> | |
<span className="punctuation">:</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">leaderboard</span> | |
<span className="comment"> | |
# | |
Type of leaderboard | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">submission:automatic</span>{" "} | |
<span className="comment"># How models are submitted</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">test:public</span>{" "} | |
<span className="comment"> | |
# Test set | |
visibility | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">judge:function</span>{" "} | |
<span className="comment"> | |
# Evaluation method | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">modality:text</span>{" "} | |
<span className="comment"> | |
# Input/output type | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">language:english</span>{" "} | |
<span className="comment"> | |
# Language coverage | |
</span> | |
<br /> | |
<span className="punctuation"> -</span>{" "} | |
<span className="value">domain:financial</span>{" "} | |
<span className="comment"> | |
# Specific domain | |
</span> | |
<br /> | |
--- | |
</CodeBlock> | |
</Box> | |
</Box> | |
</Section> | |
<Section title="What do the tags mean?"> | |
<TagSection | |
title="Domain" | |
description="Indicates the specific domain of the leaderboard" | |
tags={[ | |
"domain:medical", | |
"domain:chemistry", | |
"domain:physics", | |
"domain:biology", | |
"domain:financial", | |
"domain:legal", | |
"domain:commercial", | |
"domain:translation", | |
]} | |
/> | |
<TagSection | |
title="Modalities" | |
description="Can be any (or several) of the following list" | |
tags={[ | |
"modality:text", | |
"modality:image", | |
"modality:audio", | |
"modality:video", | |
"modality:agent", | |
"modality:artefacts", | |
"modality:3d", | |
]} | |
explanations={[ | |
"", | |
"", | |
"", | |
"", | |
"requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)", | |
"the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong>", | |
"", | |
]} | |
/> | |
<TagSection | |
title="Evaluation categories" | |
description="Can be any (or several) of the following list" | |
tags={[ | |
"eval:generation", | |
"eval:math", | |
"eval:code", | |
"eval:reasoning", | |
"eval:performance", | |
"eval:safety", | |
"eval:hallucination", | |
"eval:rag", | |
]} | |
explanations={[ | |
"the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)", | |
"the evaluation tests <strong>math abilities</strong>", | |
"the evaluation tests <strong>coding capabilities</strong>", | |
"the evaluation tests <strong>reasoning abilities</strong>", | |
"model <strong>performance</strong> (speed, energy consumption, ...)", | |
"the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>", | |
"the evaluation measures the model's tendency to <strong>hallucinate</strong> or generate <strong>false information</strong>", | |
"the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities", | |
]} | |
/> | |
<TagSection | |
title="Language" | |
description="You can indicate the languages covered by your benchmark like so: language:mylanguage." | |
tags={[ | |
"language:english", | |
"language:french", | |
"language:your own language", | |
]} | |
explanations={[ | |
"", | |
"", | |
"At the moment, we do not support language codes, please use the language name in English.", | |
]} | |
/> | |
<TagSection | |
title="Submission type" | |
description="Arenas are not concerned by this category." | |
tags={[ | |
"submission:automatic", | |
"submission:semiautomatic", | |
"submission:manual", | |
"submission:closed", | |
]} | |
explanations={[ | |
"users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention", | |
"the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results", | |
"the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions", | |
"the leaderboard <strong>does not accept</strong> submissions at the moment", | |
]} | |
/> | |
<TagSection | |
title="Test set status" | |
description="Arenas are not concerned by this category." | |
tags={["test:public", "test:mix", "test:private", "test:rolling"]} | |
explanations={[ | |
"all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>", | |
"some test sets are <strong>public</strong> and some <strong>private</strong>", | |
"all the test sets used are <strong>private</strong>, the evaluations are hard to game", | |
"the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed", | |
]} | |
/> | |
<TagSection | |
title="Judges" | |
tags={[ | |
"judge:function", | |
"judge:model", | |
"judge:humans", | |
"judge:vibe check", | |
]} | |
explanations={[ | |
"evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>", | |
"evaluations are run using a <strong>model as a judge</strong> approach to rate answer", | |
"evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>", | |
"evaluations are <strong>done manually</strong> by one or several humans", | |
]} | |
/> | |
<Typography | |
variant="body2" | |
sx={{ | |
mt: 3, | |
color: "text.secondary", | |
fontSize: "0.875rem", | |
fontStyle: "italic", | |
}} | |
> | |
If you would like to see a tag that is not currently represented, | |
please contact{" "} | |
<Link | |
href="https://huggingface.co/clementine" | |
target="_blank" | |
rel="noopener noreferrer" | |
sx={{ | |
color: "primary.main", | |
textDecoration: "none", | |
"&:hover": { | |
textDecoration: "underline", | |
}, | |
}} | |
> | |
Clémentine Fourrier | |
</Link>{" "} | |
on Hugging Face. | |
</Typography> | |
</Section> | |
</Box> | |
); | |
}; | |
export default HowToSubmitPage; | |