Fix error / Beautify m2t
Browse files- src/leaderboard_formatting.py +0 -1
- src/tasks_content.py +8 -9
src/leaderboard_formatting.py
CHANGED
@@ -70,7 +70,6 @@ def get_columns_per_task(task_id: str) -> List[str]:
|
|
70 |
return ["Model Name", "Context Size", "Dataset Name", "Dataset"] + metrics_per_task + ["Availability", "Submitted By", "Resources"]
|
71 |
if task_id == 'bug_localization':
|
72 |
return ["Model Name", "Availability", "Context Size", "Dataset"] + metrics_per_task + ["Submitted By", "Resources"]
|
73 |
-
if task_id == 'bug_localization':
|
74 |
return ["Model Name", "Context Size", "Availability"] + metrics_per_task + ["Submitted By", "Resources"]
|
75 |
|
76 |
|
|
|
70 |
return ["Model Name", "Context Size", "Dataset Name", "Dataset"] + metrics_per_task + ["Availability", "Submitted By", "Resources"]
|
71 |
if task_id == 'bug_localization':
|
72 |
return ["Model Name", "Availability", "Context Size", "Dataset"] + metrics_per_task + ["Submitted By", "Resources"]
|
|
|
73 |
return ["Model Name", "Context Size", "Availability"] + metrics_per_task + ["Submitted By", "Resources"]
|
74 |
|
75 |
|
src/tasks_content.py
CHANGED
@@ -3,7 +3,7 @@ from typing import Optional
|
|
3 |
TASKS_PRETTY = {
|
4 |
"commit_message_generation": "Commit Message Generation",
|
5 |
"bug_localization": "Bug Localization on Issue",
|
6 |
-
"
|
7 |
"library_usage": "Library Usage Examples Generation",
|
8 |
"project_code_completion": "Project-level Code Completion",
|
9 |
"bug_localization_build_logs": "Bug Localization on Build Logs",
|
@@ -32,15 +32,11 @@ TASKS_DESCRIPTIONS = {
|
|
32 |
Moreover, 150 data points from the test split were manually verified and can be used for bug localization approaches evaluation.
|
33 |
We used information retrieval metrics such as R@k, P@k and F1-score for evaluation, taking k equals to 2.
|
34 |
""",
|
35 |
-
"
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
We use the following metrics for evaluation:
|
40 |
-
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
|
41 |
-
* [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
|
42 |
-
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
|
43 |
-
* ChatGPT-Turing-Test
|
44 |
|
45 |
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `module2text` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
|
46 |
""",
|
@@ -58,5 +54,8 @@ def get_submission_text_files_for_task(task_pretty: Optional[str]) -> str:
|
|
58 |
|
59 |
if task_id == "commit_message_generation":
|
60 |
return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ποΈ Long Code Arena Team in π€ [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
|
|
|
|
|
|
|
61 |
|
62 |
return f"**{task_pretty} Instructions:**\n\n* π§ There are no instructions for the current task yet."
|
|
|
3 |
TASKS_PRETTY = {
|
4 |
"commit_message_generation": "Commit Message Generation",
|
5 |
"bug_localization": "Bug Localization on Issue",
|
6 |
+
"module_summarization": "Module Summarization",
|
7 |
"library_usage": "Library Usage Examples Generation",
|
8 |
"project_code_completion": "Project-level Code Completion",
|
9 |
"bug_localization_build_logs": "Bug Localization on Build Logs",
|
|
|
32 |
Moreover, 150 data points from the test split were manually verified and can be used for bug localization approaches evaluation.
|
33 |
We used information retrieval metrics such as R@k, P@k and F1-score for evaluation, taking k equals to 2.
|
34 |
""",
|
35 |
+
"module_summarization": """# Module Summarization\n
|
36 |
+
Our Module-to-Text benchmark π€ [JetBrains-Research/lca-module-summarization](https://huggingface.co/datasets/JetBrains-Research/lca-module-summarization) includes 216 manually curated text files describing different documentation of opensource permissive Python projects.
|
37 |
|
38 |
+
We use new metric for evaluation:
|
39 |
+
* [CompScore](https://github.com/JetBrains-Research/lca-baselines/tree/module2text)
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `module2text` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
|
42 |
""",
|
|
|
54 |
|
55 |
if task_id == "commit_message_generation":
|
56 |
return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ποΈ Long Code Arena Team in π€ [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
|
57 |
+
|
58 |
+
if task_id == "module_summarization":
|
59 |
+
return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ποΈ Long Code Arena Team in π€ [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
|
60 |
|
61 |
return f"**{task_pretty} Instructions:**\n\n* π§ There are no instructions for the current task yet."
|