Spaces:
Running
Running
| import React from "react"; | |
| import { | |
| Box, | |
| Typography, | |
| Paper, | |
| Stack, | |
| Divider, | |
| alpha, | |
| Link, | |
| } from "@mui/material"; | |
| import PageHeader from "../../components/PageHeader/PageHeader"; | |
| const StepNumber = ({ number }) => ( | |
| <Box | |
| sx={{ | |
| width: 32, | |
| height: 32, | |
| borderRadius: "50%", | |
| display: "flex", | |
| alignItems: "center", | |
| justifyContent: "center", | |
| border: "1px solid", | |
| borderColor: "primary.main", | |
| color: "primary.main", | |
| fontSize: "0.875rem", | |
| fontWeight: 600, | |
| flexShrink: 0, | |
| bgcolor: "transparent", | |
| }} | |
| > | |
| {number} | |
| </Box> | |
| ); | |
| const Section = ({ title, children }) => ( | |
| <Paper | |
| elevation={0} | |
| sx={{ | |
| border: "1px solid", | |
| borderColor: "grey.300", | |
| borderRadius: 1, | |
| overflow: "hidden", | |
| mb: 3, | |
| }} | |
| > | |
| <Box | |
| sx={{ | |
| px: 3, | |
| py: 2, | |
| borderBottom: "1px solid", | |
| borderColor: (theme) => | |
| theme.palette.mode === "dark" | |
| ? alpha(theme.palette.divider, 0.1) | |
| : "grey.200", | |
| bgcolor: (theme) => | |
| theme.palette.mode === "dark" | |
| ? alpha(theme.palette.background.paper, 0.5) | |
| : "grey.50", | |
| }} | |
| > | |
| <Typography variant="h6" sx={{ fontWeight: 600, color: "text.primary" }}> | |
| {title} | |
| </Typography> | |
| </Box> | |
| <Box sx={{ p: 3, bgcolor: "background.paper" }}>{children}</Box> | |
| </Paper> | |
| ); | |
| const Tag = ({ children }) => ( | |
| <Box | |
| component="span" | |
| sx={{ | |
| display: "inline-block", | |
| px: 1.5, | |
| py: 0.5, | |
| bgcolor: (theme) => alpha(theme.palette.primary.main, 0.1), | |
| color: "primary.main", | |
| borderRadius: 1, | |
| fontSize: "0.875rem", | |
| fontWeight: 600, | |
| mr: 1, | |
| mb: 1, | |
| }} | |
| > | |
| {children} | |
| </Box> | |
| ); | |
| const TagSection = ({ title, description, tags, explanations }) => ( | |
| <Box sx={{ mb: 4 }}> | |
| <Typography variant="h6" sx={{ fontWeight: 600, mb: 1 }}> | |
| {title} | |
| </Typography> | |
| {description && ( | |
| <Typography variant="body1" sx={{ mb: 2, color: "text.secondary" }}> | |
| {description} | |
| </Typography> | |
| )} | |
| <Stack spacing={1}> | |
| {tags.map((tag, index) => ( | |
| <Box key={index}> | |
| <Tag>{tag}</Tag> | |
| {explanations && explanations[index] && ( | |
| <Typography | |
| component="span" | |
| variant="body2" | |
| sx={{ color: "text.secondary", ml: 1 }} | |
| dangerouslySetInnerHTML={{ __html: explanations[index] }} | |
| /> | |
| )} | |
| </Box> | |
| ))} | |
| </Stack> | |
| </Box> | |
| ); | |
| const CodeBlock = ({ children }) => ( | |
| <Box | |
| sx={{ | |
| backgroundColor: (theme) => | |
| theme.palette.mode === "dark" | |
| ? "rgba(255, 255, 255, 0.05)" | |
| : "rgba(0, 0, 0, 0.03)", | |
| p: 2, | |
| borderRadius: 1, | |
| fontFamily: "monospace", | |
| mb: 2, | |
| "& .key": { | |
| color: (theme) => theme.palette.primary.main, | |
| }, | |
| "& .value": { | |
| color: (theme) => | |
| theme.palette.mode === "dark" | |
| ? theme.palette.success.light | |
| : theme.palette.success.dark, | |
| }, | |
| "& .comment": { | |
| color: (theme) => theme.palette.text.secondary, | |
| }, | |
| "& .punctuation": { | |
| color: (theme) => theme.palette.text.primary, | |
| }, | |
| }} | |
| > | |
| {children} | |
| </Box> | |
| ); | |
| const HowToSubmitPage = () => { | |
| return ( | |
| <Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}> | |
| <PageHeader | |
| title="How to submit ?" | |
| subtitle={ | |
| <> | |
| Join the <span style={{ fontWeight: 600 }}>community</span> of{" "} | |
| <span style={{ fontWeight: 600 }}>"leaderboards on the Hub"</span> | |
| </> | |
| } | |
| /> | |
| <Section title="Configuration steps"> | |
| <Stack spacing={4}> | |
| <Typography variant="body1" color="text.secondary"> | |
| Your leaderboard must be hosted on a{" "} | |
| <Link | |
| href="https://huggingface.co/docs/hub/spaces" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| > | |
| Hugging Face Space | |
| </Link> | |
| . | |
| <br /> | |
| Like{" "} | |
| <Link | |
| href="https://huggingface.co/docs/hub/model-cards" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| > | |
| model cards | |
| </Link> | |
| , your Space's <strong>README.md</strong> file should include | |
| specific <strong>metadata</strong> in a YAML section at the top. | |
| </Typography> | |
| <Box | |
| sx={{ | |
| display: "flex", | |
| flexDirection: { xs: "column", md: "row" }, | |
| gap: 4, | |
| position: "relative", | |
| }} | |
| > | |
| <Box sx={{ flex: 4 }}> | |
| <Stack spacing={4}> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={1} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Define the type | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography variant="body2" color="text.secondary"> | |
| Add either the <strong>leaderboard</strong> or{" "} | |
| <strong>arena</strong> tag. | |
| </Typography> | |
| </Box> | |
| </Stack> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={2} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Add a description | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography variant="body2" color="text.secondary"> | |
| Include a <strong>short_description</strong> field to | |
| explain the purpose of your evaluation. | |
| </Typography> | |
| </Box> | |
| </Stack> | |
| <Stack spacing={3}> | |
| <Stack direction="row" spacing={2} alignItems="center"> | |
| <StepNumber number={3} /> | |
| <Typography | |
| variant="subtitle1" | |
| sx={{ | |
| fontWeight: 600, | |
| color: "text.primary", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Specify metadata | |
| </Typography> | |
| </Stack> | |
| <Box sx={{ pl: 7 }}> | |
| <Typography variant="body2" color="text.secondary"> | |
| Add <strong>metadata tags</strong> to categorize your | |
| evaluation and help users understand its characteristics. | |
| </Typography> | |
| </Box> | |
| </Stack> | |
| </Stack> | |
| </Box> | |
| <Divider | |
| orientation="vertical" | |
| flexItem | |
| sx={{ | |
| display: { xs: "none", md: "block" }, | |
| }} | |
| /> | |
| <Box sx={{ flex: 5 }}> | |
| <CodeBlock> | |
| --- | |
| <br /> | |
| <span className="key">short_description</span> | |
| <span className="punctuation">:</span>{" "} | |
| <span className="value"> | |
| Evaluating LLMs on math reasoning tasks | |
| </span> | |
| <br /> | |
| <span className="key">tags</span> | |
| <span className="punctuation">:</span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">leaderboard</span> | |
| <span className="comment"> | |
| # | |
| Type of leaderboard | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">submission:automatic</span>{" "} | |
| <span className="comment"># How models are submitted</span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">test:public</span>{" "} | |
| <span className="comment"> | |
| # Test | |
| set visibility | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">judge:function</span>{" "} | |
| <span className="comment"> | |
| # Evaluation method | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">modality:text</span>{" "} | |
| <span className="comment"> | |
| # Input/output type | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">language:english</span>{" "} | |
| <span className="comment"> | |
| # Language coverage | |
| </span> | |
| <br /> | |
| <span className="punctuation"> -</span>{" "} | |
| <span className="value">domain:financial</span>{" "} | |
| <span className="comment"> | |
| # Specific domain | |
| </span> | |
| <br /> | |
| --- | |
| </CodeBlock> | |
| </Box> | |
| </Box> | |
| </Stack> | |
| </Section> | |
| <Section title="What do the tags mean?"> | |
| <TagSection | |
| title="Submission type" | |
| description="Arenas are not concerned by this category." | |
| tags={[ | |
| "submission:automatic", | |
| "submission:semiautomatic", | |
| "submission:manual", | |
| "submission:closed", | |
| ]} | |
| explanations={[ | |
| "users can submit their models as such to the leaderboard, and evaluation is run <strong>automatically</strong> without human intervention", | |
| "the leaderboard requires the <strong>model owner</strong> to run evaluations on his side and submit the results", | |
| "the leaderboard requires the <strong>leaderboard owner</strong> to run evaluations for new submissions", | |
| "the leaderboard <strong>does not accept</strong> submissions at the moment", | |
| ]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Test set status" | |
| description="Arenas are not concerned by this category." | |
| tags={["test:public", "test:mix", "test:private", "test:rolling"]} | |
| explanations={[ | |
| "all the test sets used are <strong>public</strong>, the evaluations are completely <strong>reproducible</strong>", | |
| "some test sets are <strong>public</strong> and some <strong>private</strong>", | |
| "all the test sets used are <strong>private</strong>, the evaluations are hard to game", | |
| "the test sets used <strong>change regularly</strong> through time and evaluation scores are refreshed", | |
| ]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Judges" | |
| tags={[ | |
| "judge:function", | |
| "judge:model", | |
| "judge:humans", | |
| "judge:vibe_check", | |
| ]} | |
| explanations={[ | |
| "evaluations are run <strong>automatically</strong>, using an evaluation suite such as <strong>lm_eval</strong> or <strong>lighteval</strong>", | |
| "evaluations are run using a <strong>model as a judge</strong> approach to rate answer", | |
| "evaluations are <strong>done by humans</strong> to rate answer - <strong>this is an arena</strong>", | |
| "evaluations are <strong>done manually</strong> by one or several humans", | |
| ]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Modalities" | |
| description="Can be any (or several) of the following list:" | |
| tags={[ | |
| "modality:text", | |
| "modality:image", | |
| "modality:audio", | |
| "modality:video", | |
| "modality:tools", | |
| "modality:artefacts", | |
| ]} | |
| explanations={[ | |
| "", | |
| "", | |
| "", | |
| "", | |
| "requires added <strong>tool usage</strong> - mostly for <strong>assistant models</strong> (a bit outside of usual modalities)", | |
| "the leaderboard concerns itself with <strong>machine learning artefacts</strong> as themselves, for example, quality evaluation of <strong>text embeddings</strong> (a bit outside of usual modalities)", | |
| ]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Evaluation categories" | |
| description="Can be any (or several) of the following list:" | |
| tags={[ | |
| "eval:generation", | |
| "eval:math", | |
| "eval:code", | |
| "eval:performance", | |
| "eval:safety", | |
| "task:rag", | |
| ]} | |
| explanations={[ | |
| "the evaluation looks at <strong>generation capabilities</strong> specifically (can be image generation, text generation, ...)", | |
| "the evaluation tests <strong>math abilities</strong>", | |
| "the evaluation tests <strong>coding capabilities</strong>", | |
| "model <strong>performance</strong> (speed, energy consumption, ...)", | |
| "the evaluation considers <strong>safety</strong>, <strong>toxicity</strong>, <strong>bias</strong>", | |
| "the evaluation tests <strong>RAG</strong> (Retrieval-Augmented Generation) capabilities", | |
| ]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Language" | |
| description="You can indicate the languages covered by your benchmark like so: language:mylanguage. At the moment, we do not support language codes, please use the language name in English." | |
| tags={["language:english", "language:french"]} | |
| /> | |
| <Divider sx={{ my: 3 }} /> | |
| <TagSection | |
| title="Domain" | |
| description="Indicates the specific domain of the leaderboard:" | |
| tags={["domain:financial", "domain:medical", "domain:legal"]} | |
| /> | |
| <Typography | |
| variant="body2" | |
| sx={{ | |
| mt: 1, | |
| color: "text.secondary", | |
| fontSize: "0.875rem", | |
| fontStyle: "italic", | |
| }} | |
| > | |
| If you would like to see a domain that is not currently represented, | |
| please contact{" "} | |
| <Link | |
| href="https://huggingface.co/clementine" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| sx={{ | |
| color: "primary.main", | |
| textDecoration: "none", | |
| "&:hover": { | |
| textDecoration: "underline", | |
| }, | |
| }} | |
| > | |
| Clementine Fourrier | |
| </Link>{" "} | |
| on Hugging Face. | |
| </Typography> | |
| </Section> | |
| </Box> | |
| ); | |
| }; | |
| export default HowToSubmitPage; | |