tathagataraha commited on
Commit
2f1ad94
Β·
verified Β·
1 Parent(s): 237c120

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -112
app.py CHANGED
@@ -250,117 +250,6 @@ with demo:
250
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
251
 
252
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
253
- with gr.TabItem("πŸ… Closed Ended Evaluation", elem_id="llm-benchmark-tab-table", id=0):
254
- with gr.Row():
255
- with gr.Column():
256
- with gr.Row():
257
- search_bar = gr.Textbox(
258
- placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
259
- show_label=False,
260
- elem_id="search-bar",
261
- )
262
- with gr.Row():
263
- shown_columns = gr.CheckboxGroup(
264
- choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden and (c.invariant or c.dataset_task_col)],
265
- value=[
266
- c.name
267
- for c in fields(AutoEvalColumn)
268
- if c.displayed_by_default and not c.hidden and not c.never_hidden and (c.invariant or c.dataset_task_col)
269
- ],
270
- label="Select columns to show",
271
- elem_id="column-select",
272
- interactive=True,
273
- )
274
- # with gr.Row():
275
- # deleted_models_visibility = gr.Checkbox(
276
- # value=False, label="Show gated/private/deleted models", interactive=True
277
- # )
278
- with gr.Column(min_width=320):
279
- # with gr.Box(elem_id="box-filter"):
280
- filter_columns_type = gr.CheckboxGroup(
281
- label="Model Types",
282
- choices=[t.to_str() for t in ModelType],
283
- value=[t.to_str() for t in ModelType],
284
- interactive=True,
285
- elem_id="filter-columns-type",
286
- )
287
- # filter_columns_architecture = gr.CheckboxGroup(
288
- # label="Architecture Types",
289
- # choices=[i.value.name for i in ModelArch],
290
- # value=[i.value.name for i in ModelArch],
291
- # interactive=True,
292
- # elem_id="filter-columns-architecture",
293
- # )
294
- filter_domain_specific = gr.CheckboxGroup(
295
- label="Domain Specificity",
296
- choices=["πŸ₯ Clinical models", "Generic models"],
297
- value=["πŸ₯ Clinical models", "Generic models"],
298
- interactive=True,
299
- elem_id="filter-columns-type",
300
- )
301
- filter_columns_size = gr.CheckboxGroup(
302
- label="Model sizes (in billions of parameters)",
303
- choices=list(NUMERIC_INTERVALS.keys()),
304
- value=list(NUMERIC_INTERVALS.keys()),
305
- interactive=True,
306
- elem_id="filter-columns-size",
307
- )
308
-
309
- datasets_leaderboard_df, datasets_original_df = update_df(shown_columns.value, subset="datasets")
310
-
311
- leaderboard_table = gr.components.Dataframe(
312
- value=datasets_leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
313
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
314
- datatype=TYPES,
315
- elem_id="leaderboard-table",
316
- interactive=False,
317
- visible=True,
318
- )
319
-
320
- # Dummy leaderboard for handling the case when the user uses backspace key
321
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
322
- value=datasets_original_df[DATASET_COLS],
323
- headers=DATASET_COLS,
324
- datatype=TYPES,
325
- visible=False,
326
- )
327
-
328
-
329
- search_bar.submit(
330
- update_table,
331
- [
332
- hidden_leaderboard_table_for_search,
333
- shown_columns,
334
- search_bar,
335
- filter_columns_type,
336
- filter_domain_specific,
337
- filter_columns_size
338
- # filter_columns_architecture
339
- ],
340
- leaderboard_table,
341
- )
342
- for selector in [
343
- shown_columns,
344
- filter_columns_type,
345
- filter_domain_specific,
346
- # filter_columns_architecture,
347
- filter_columns_size,
348
- # deleted_models_visibility,
349
- ]:
350
- selector.change(
351
- update_table,
352
- [
353
- hidden_leaderboard_table_for_search,
354
- shown_columns,
355
- search_bar,
356
- filter_columns_type,
357
- filter_domain_specific,
358
- filter_columns_size
359
- # filter_columns_architecture,
360
- ],
361
- leaderboard_table,
362
- queue=True,
363
- )
364
 
365
  with gr.TabItem("πŸ… Open Ended Evaluation", elem_id="llm-benchmark-tab-table", id=1):
366
  with gr.Row():
@@ -938,7 +827,119 @@ with demo:
938
  with gr.Accordion("Question generation", open=False):
939
  system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="question_generation")
940
  with gr.Accordion("Cross Examination", open=False):
941
- system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=5):
943
  gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
944
  gr.HTML(FIVE_PILLAR_DIAGRAM)
 
250
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
251
 
252
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  with gr.TabItem("πŸ… Open Ended Evaluation", elem_id="llm-benchmark-tab-table", id=1):
255
  with gr.Row():
 
827
  with gr.Accordion("Question generation", open=False):
828
  system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="question_generation")
829
  with gr.Accordion("Cross Examination", open=False):
830
+ system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
831
+ with gr.TabItem("πŸ… Closed Ended Evaluation", elem_id="llm-benchmark-tab-table", id=0):
832
+ with gr.Row():
833
+ with gr.Column():
834
+ with gr.Row():
835
+ search_bar = gr.Textbox(
836
+ placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
837
+ show_label=False,
838
+ elem_id="search-bar",
839
+ )
840
+ with gr.Row():
841
+ shown_columns = gr.CheckboxGroup(
842
+ choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden and (c.invariant or c.dataset_task_col)],
843
+ value=[
844
+ c.name
845
+ for c in fields(AutoEvalColumn)
846
+ if c.displayed_by_default and not c.hidden and not c.never_hidden and (c.invariant or c.dataset_task_col)
847
+ ],
848
+ label="Select columns to show",
849
+ elem_id="column-select",
850
+ interactive=True,
851
+ )
852
+ # with gr.Row():
853
+ # deleted_models_visibility = gr.Checkbox(
854
+ # value=False, label="Show gated/private/deleted models", interactive=True
855
+ # )
856
+ with gr.Column(min_width=320):
857
+ # with gr.Box(elem_id="box-filter"):
858
+ filter_columns_type = gr.CheckboxGroup(
859
+ label="Model Types",
860
+ choices=[t.to_str() for t in ModelType],
861
+ value=[t.to_str() for t in ModelType],
862
+ interactive=True,
863
+ elem_id="filter-columns-type",
864
+ )
865
+ # filter_columns_architecture = gr.CheckboxGroup(
866
+ # label="Architecture Types",
867
+ # choices=[i.value.name for i in ModelArch],
868
+ # value=[i.value.name for i in ModelArch],
869
+ # interactive=True,
870
+ # elem_id="filter-columns-architecture",
871
+ # )
872
+ filter_domain_specific = gr.CheckboxGroup(
873
+ label="Domain Specificity",
874
+ choices=["πŸ₯ Clinical models", "Generic models"],
875
+ value=["πŸ₯ Clinical models", "Generic models"],
876
+ interactive=True,
877
+ elem_id="filter-columns-type",
878
+ )
879
+ filter_columns_size = gr.CheckboxGroup(
880
+ label="Model sizes (in billions of parameters)",
881
+ choices=list(NUMERIC_INTERVALS.keys()),
882
+ value=list(NUMERIC_INTERVALS.keys()),
883
+ interactive=True,
884
+ elem_id="filter-columns-size",
885
+ )
886
+
887
+ datasets_leaderboard_df, datasets_original_df = update_df(shown_columns.value, subset="datasets")
888
+
889
+ leaderboard_table = gr.components.Dataframe(
890
+ value=datasets_leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
891
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
892
+ datatype=TYPES,
893
+ elem_id="leaderboard-table",
894
+ interactive=False,
895
+ visible=True,
896
+ )
897
+
898
+ # Dummy leaderboard for handling the case when the user uses backspace key
899
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
900
+ value=datasets_original_df[DATASET_COLS],
901
+ headers=DATASET_COLS,
902
+ datatype=TYPES,
903
+ visible=False,
904
+ )
905
+
906
+
907
+ search_bar.submit(
908
+ update_table,
909
+ [
910
+ hidden_leaderboard_table_for_search,
911
+ shown_columns,
912
+ search_bar,
913
+ filter_columns_type,
914
+ filter_domain_specific,
915
+ filter_columns_size
916
+ # filter_columns_architecture
917
+ ],
918
+ leaderboard_table,
919
+ )
920
+ for selector in [
921
+ shown_columns,
922
+ filter_columns_type,
923
+ filter_domain_specific,
924
+ # filter_columns_architecture,
925
+ filter_columns_size,
926
+ # deleted_models_visibility,
927
+ ]:
928
+ selector.change(
929
+ update_table,
930
+ [
931
+ hidden_leaderboard_table_for_search,
932
+ shown_columns,
933
+ search_bar,
934
+ filter_columns_type,
935
+ filter_domain_specific,
936
+ filter_columns_size
937
+ # filter_columns_architecture,
938
+ ],
939
+ leaderboard_table,
940
+ queue=True,
941
+ )
942
+
943
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=5):
944
  gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
945
  gr.HTML(FIVE_PILLAR_DIAGRAM)