KlaudiaTH
commited on
Commit
·
625e239
1
Parent(s):
3cf41e9
Refactorings and fixes for tab handling and few-shot selection
Browse files
app.py
CHANGED
|
@@ -12,6 +12,8 @@ with demo:
|
|
| 12 |
elem_classes="markdown-text",
|
| 13 |
)
|
| 14 |
|
|
|
|
|
|
|
| 15 |
with gr.Column():
|
| 16 |
with gr.Row():
|
| 17 |
with gr.Column():
|
|
@@ -67,10 +69,9 @@ with demo:
|
|
| 67 |
select.click(update_bar, inputs=[], outputs=langs_bar)
|
| 68 |
|
| 69 |
with gr.Row():
|
| 70 |
-
acc_task_group_names = core.task_groups_with_task_type("accuracy")
|
| 71 |
shown_tasks = gr.CheckboxGroup(
|
| 72 |
-
choices=
|
| 73 |
-
value=
|
| 74 |
label="Select tasks to show",
|
| 75 |
elem_id="column-select",
|
| 76 |
interactive=True,
|
|
@@ -80,15 +81,8 @@ with demo:
|
|
| 80 |
choices=[("0-Shot", False), ("Few-shot", True)],
|
| 81 |
value=True,
|
| 82 |
label="Select evaluation type",
|
| 83 |
-
interactive=True,
|
| 84 |
scale=29,
|
| 85 |
)
|
| 86 |
-
demo.load(
|
| 87 |
-
core.fix_zeroshot, [shown_tasks, fewshot], shown_tasks
|
| 88 |
-
)
|
| 89 |
-
fewshot.change(
|
| 90 |
-
core.fix_zeroshot, [shown_tasks, fewshot], shown_tasks
|
| 91 |
-
)
|
| 92 |
clear = gr.ClearButton(
|
| 93 |
shown_tasks, value="Deselect all tasks", size="sm", scale=21
|
| 94 |
)
|
|
@@ -104,33 +98,44 @@ with demo:
|
|
| 104 |
id=1,
|
| 105 |
) as misc:
|
| 106 |
leaderboard_table_misc = gr.Dataframe()
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
)
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
)
|
| 117 |
-
for comp, fn in [
|
| 118 |
-
(search_bar, "submit"),
|
| 119 |
-
(langs_bar, "change"),
|
| 120 |
-
(shown_tasks, "change"),
|
| 121 |
-
(fewshot, "change"),
|
| 122 |
-
(model_types, "change"),
|
| 123 |
-
]:
|
| 124 |
-
getattr(comp, fn)(
|
| 125 |
-
core.update_df,
|
| 126 |
-
[shown_tasks, search_bar, langs_bar, model_types, fewshot],
|
| 127 |
-
leaderboard_table,
|
| 128 |
-
)
|
| 129 |
-
getattr(comp, fn)(
|
| 130 |
-
core.update_df,
|
| 131 |
-
[shown_tasks, search_bar, langs_bar, model_types, fewshot],
|
| 132 |
-
leaderboard_table_misc,
|
| 133 |
-
)
|
| 134 |
|
| 135 |
|
| 136 |
gr.Blocks.load(
|
|
|
|
| 12 |
elem_classes="markdown-text",
|
| 13 |
)
|
| 14 |
|
| 15 |
+
selected_tab = gr.State(value=0)
|
| 16 |
+
|
| 17 |
with gr.Column():
|
| 18 |
with gr.Row():
|
| 19 |
with gr.Column():
|
|
|
|
| 69 |
select.click(update_bar, inputs=[], outputs=langs_bar)
|
| 70 |
|
| 71 |
with gr.Row():
|
|
|
|
| 72 |
shown_tasks = gr.CheckboxGroup(
|
| 73 |
+
choices=[],
|
| 74 |
+
value=[],
|
| 75 |
label="Select tasks to show",
|
| 76 |
elem_id="column-select",
|
| 77 |
interactive=True,
|
|
|
|
| 81 |
choices=[("0-Shot", False), ("Few-shot", True)],
|
| 82 |
value=True,
|
| 83 |
label="Select evaluation type",
|
|
|
|
| 84 |
scale=29,
|
| 85 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
clear = gr.ClearButton(
|
| 87 |
shown_tasks, value="Deselect all tasks", size="sm", scale=21
|
| 88 |
)
|
|
|
|
| 98 |
id=1,
|
| 99 |
) as misc:
|
| 100 |
leaderboard_table_misc = gr.Dataframe()
|
| 101 |
+
|
| 102 |
+
demo.load(
|
| 103 |
+
core.update_task_groups_and_fewshot,
|
| 104 |
+
[gr.State(value=0), fewshot],
|
| 105 |
+
[shown_tasks, fewshot, selected_tab],
|
| 106 |
+
)
|
| 107 |
+
fewshot.change(
|
| 108 |
+
core.update_task_groups_and_fewshot,
|
| 109 |
+
[selected_tab, fewshot],
|
| 110 |
+
[shown_tasks, fewshot, selected_tab],
|
| 111 |
+
)
|
| 112 |
+
acc.select(
|
| 113 |
+
core.update_task_groups_and_fewshot,
|
| 114 |
+
inputs=[gr.State(value=0), fewshot],
|
| 115 |
+
outputs=[shown_tasks, fewshot, selected_tab],
|
| 116 |
+
)
|
| 117 |
+
misc.select(
|
| 118 |
+
core.update_task_groups_and_fewshot,
|
| 119 |
+
inputs=[gr.State(value=1), fewshot],
|
| 120 |
+
outputs=[shown_tasks, fewshot, selected_tab],
|
| 121 |
+
)
|
| 122 |
+
for comp, fn in [
|
| 123 |
+
(search_bar, "submit"),
|
| 124 |
+
(langs_bar, "change"),
|
| 125 |
+
(shown_tasks, "change"),
|
| 126 |
+
(fewshot, "change"),
|
| 127 |
+
(model_types, "change"),
|
| 128 |
+
]:
|
| 129 |
+
getattr(comp, fn)(
|
| 130 |
+
core.update_df,
|
| 131 |
+
[shown_tasks, search_bar, langs_bar, model_types, fewshot],
|
| 132 |
+
leaderboard_table,
|
| 133 |
)
|
| 134 |
+
getattr(comp, fn)(
|
| 135 |
+
core.update_df,
|
| 136 |
+
[shown_tasks, search_bar, langs_bar, model_types, fewshot],
|
| 137 |
+
leaderboard_table_misc,
|
| 138 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
|
| 141 |
gr.Blocks.load(
|
core.py
CHANGED
|
@@ -9,9 +9,8 @@ from datasets import load_dataset
|
|
| 9 |
|
| 10 |
import style
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
NO_ZEROSHOT = ["GSM8K", "TruthfulQA"] # FIXME
|
| 15 |
|
| 16 |
|
| 17 |
def init():
|
|
@@ -126,68 +125,29 @@ def update_df(
|
|
| 126 |
return sort_cols(df, fewshot)
|
| 127 |
|
| 128 |
|
| 129 |
-
def
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
value
|
| 137 |
-
else:
|
| 138 |
-
if TAB_STATE == 0:
|
| 139 |
-
choices = [c for c in choices if c not in NO_FEWSHOT]
|
| 140 |
-
value = [v for v in tasks if v in choices]
|
| 141 |
-
value += [t for t in NO_ZEROSHOT if t not in value]
|
| 142 |
-
elif TAB_STATE == 1:
|
| 143 |
-
value = [v for v in tasks if v in choices]
|
| 144 |
-
shown_tasks = gr.CheckboxGroup(
|
| 145 |
-
choices=choices,
|
| 146 |
-
value=value,
|
| 147 |
-
label="Select tasks to show",
|
| 148 |
-
elem_id="column-select",
|
| 149 |
-
interactive=True,
|
| 150 |
-
scale=50,
|
| 151 |
)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
values = choices.copy()
|
| 164 |
-
shown_tasks = gr.CheckboxGroup(
|
| 165 |
-
choices=choices,
|
| 166 |
-
value=values,
|
| 167 |
-
label="Select tasks to show",
|
| 168 |
-
elem_id="column-select",
|
| 169 |
-
interactive=True,
|
| 170 |
-
scale=50,
|
| 171 |
)
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
fewshot = gr.Radio(
|
| 175 |
-
choices=[("0-Shot", False), ("Few-shot", True)],
|
| 176 |
-
value=True,
|
| 177 |
-
label="Select evaluation type",
|
| 178 |
-
interactive=True,
|
| 179 |
-
scale=29,
|
| 180 |
-
)
|
| 181 |
-
elif id == 1:
|
| 182 |
-
# switching to translation tab, default to 0-shot and disable selection
|
| 183 |
-
fewshot = gr.Radio(
|
| 184 |
-
choices=[("0-Shot", False), ("Few-shot", True)],
|
| 185 |
-
value=False,
|
| 186 |
-
label="Select evaluation type",
|
| 187 |
-
interactive=False,
|
| 188 |
-
scale=29,
|
| 189 |
-
)
|
| 190 |
-
return [shown_tasks, fewshot]
|
| 191 |
|
| 192 |
|
| 193 |
def get_selected_task_type(task_type_id):
|
|
@@ -196,10 +156,15 @@ def get_selected_task_type(task_type_id):
|
|
| 196 |
return selected_task_type
|
| 197 |
|
| 198 |
|
| 199 |
-
def
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
-
return
|
| 203 |
|
| 204 |
|
| 205 |
init()
|
|
|
|
| 9 |
|
| 10 |
import style
|
| 11 |
|
| 12 |
+
ZERO_SHOT_ONLY = ["BELEBELE"]
|
| 13 |
+
FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def init():
|
|
|
|
| 125 |
return sort_cols(df, fewshot)
|
| 126 |
|
| 127 |
|
| 128 |
+
def update_task_groups_and_fewshot(current_selected_tab: int, is_fewshot_current: bool = False):
|
| 129 |
+
selected_task_type = get_selected_task_type(current_selected_tab)
|
| 130 |
+
available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
|
| 131 |
+
new_selected_tasks = available_tasks.copy()
|
| 132 |
+
|
| 133 |
+
tasks_checkbox_group_update = gr.CheckboxGroup(
|
| 134 |
+
choices=available_tasks,
|
| 135 |
+
value=new_selected_tasks,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
+
|
| 138 |
+
if current_selected_tab == 0:
|
| 139 |
+
is_fewshot_new = is_fewshot_current
|
| 140 |
+
fewshot_available = True
|
| 141 |
+
elif current_selected_tab == 1:
|
| 142 |
+
is_fewshot_new = False
|
| 143 |
+
fewshot_available = False
|
| 144 |
+
|
| 145 |
+
fewshot_radio_update = gr.Radio(
|
| 146 |
+
value=is_fewshot_new,
|
| 147 |
+
interactive=fewshot_available,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
)
|
| 149 |
+
|
| 150 |
+
return [tasks_checkbox_group_update, fewshot_radio_update, current_selected_tab]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
def get_selected_task_type(task_type_id):
|
|
|
|
| 156 |
return selected_task_type
|
| 157 |
|
| 158 |
|
| 159 |
+
def get_available_task_groups(selected_task_type, fewshot):
|
| 160 |
+
task_groups = [task_group_name for task_group_name, task_type in task_group_type_dict.items() if task_type == selected_task_type]
|
| 161 |
+
|
| 162 |
+
if fewshot:
|
| 163 |
+
available_tasks = [c for c in task_groups if c not in ZERO_SHOT_ONLY]
|
| 164 |
+
else:
|
| 165 |
+
available_tasks = [c for c in task_groups if c not in FEW_SHOT_ONLY]
|
| 166 |
|
| 167 |
+
return available_tasks
|
| 168 |
|
| 169 |
|
| 170 |
init()
|