File size: 5,277 Bytes
6c92442 2fb6d29 6c92442 9cb627e 6c92442 04f40cd 6c92442 04f40cd 6c92442 9cb627e aa6b5d3 9cb627e 8f69df6 0fc7c7a 8f69df6 8521570 0fc7c7a fd10e2f 0fc7c7a 6c92442 8f69df6 6c92442 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from typing import Optional
TASKS_PRETTY = {
"library_based_code_generation": "Library-based code generation",
"ci_builds_repair": "CI builds repair",
"project_code_completion": "Project-level code completion",
"commit_message_generation": "Commit message generation",
"bug_localization": "Bug localization",
"module_summarization": "Module Summarization",
}
TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
TASKS_DESCRIPTIONS = {
"library_based_code_generation": "cool description for Library Usage Examples Generation task",
"ci_builds_repair": "cool description for Bug Localization on Build Logs task",
"project_code_completion": """# Project-Level Code Completion\n
Our Project-Level Code Completion π€ [JetBrains-Research/lca-code-completion](https://huggingface.co/datasets/JetBrains-Research/lca-code-completion) includes four datasets:
* `small-context`: 144 data points,
* `medium-context`: 224 data points,
* `large-context`: 270 data points,
* `huge-context`: 296 data points.
We use standard Exact Match (EM) metric for one-line code completion.
We evaluate Exact Match for different line categories:
* *infile* β functions and classes are from the completion file;
* *inproject* β functions and files are from the repository snapshot;
* *committed* β functions and classes are from the files that were added on the completion file commit;
* *common* β functions and classes with common names, e.g., `main`, `get`, etc.;
* *non-informative* β short/long lines, import/print lines, or comment lines;
* *random* β lines that doesn't fit to any of previous categories.
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `code_completion` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
""",
"commit_message_generation": """# Commit Message Generation\n
Our Commit Message Generation benchmark π€ [JetBrains-Research/lca-commit-message-generation](https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation) includes 163 manually curated commits from Python projects.
We use the following metrics for evaluation:
* [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
* [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
**Note.** The leaderboard is sorted by ROUGE-1 metric by default.
""",
"bug_localization": """# Bug Localization\n
Our Module-to-Text benchmark π€ [JetBrains-Research/lca-bug-localization](https://huggingface.co/datasets/JetBrains-Research/lca-bug-localization) includes 7,479 bug issue descriptions with information about pull request that fix them for Python, Java and Kotlin projects.
Moreover, 150 data points from the test split were manually verified and can be used for bug localization approaches evaluation.
We used information retrieval metrics such as R@k, P@k and F1-score for evaluation, taking k equals to 2.
""",
"module_summarization": """# Module Summarization\n
Our Module-to-Text benchmark π€ [JetBrains-Research/lca-module-summarization](https://huggingface.co/datasets/JetBrains-Research/lca-module-summarization) includes 216 manually curated text files describing different documentation of opensource permissive Python projects.
We use new metric for evaluation:
* [CompScore](https://github.com/JetBrains-Research/lca-baselines/tree/module2text)
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `module2text` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
""",
}
def get_submission_text_files_for_task(task_pretty: Optional[str]) -> str:
if not task_pretty:
return "Please, select a specific task to see more detailed instructions regarding submitting files."
task_id = TASKS_PRETTY_REVERSE[task_pretty]
if task_id == "commit_message_generation":
return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ποΈ Long Code Arena Team in π€ [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
return f"**{task_pretty} Instructions:**\n\n* π§ There are no instructions for the current task yet."
|