GIFT-Eval

Running

App Files Files Community

juncliu commited on Nov 12, 2024

Commit

a47646c

1 Parent(s): c870669

add model_link

Browse files

Files changed (11) hide show

app.py +8 -92
results/Chronos_small/config.json +2 -1
results/Moirai_base/config.json +2 -1
results/Moirai_large/config.json +2 -1
results/Moirai_small/config.json +2 -1
results/chronos_base/config.json +2 -1
results/chronos_large/config.json +2 -1
results/timer_small/config.json +5 -0
results/timesfm/config.json +2 -1
src/display/utils.py +3 -3
src/leaderboard/read_evals.py +1 -1

app.py CHANGED Viewed

@@ -110,17 +110,23 @@ def init_leaderboard(ori_dataframe, model_info_df):
     if ori_dataframe is None or ori_dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     model_info_col_list = [c.name for c in fields(ModelInfoColumn) if c.displayed_by_default if c.name not in ['#Params (B)', 'available_on_hub', 'hub', 'Model sha','Hub License']]
     default_selection_list = list(ori_dataframe.columns) + model_info_col_list
-    print('default_selection_list: ', default_selection_list)
     # ipdb.set_trace()
     # default_selection_list = [col for col in default_selection_list if col not in ['#Params (B)', 'available_on_hub', 'hub', 'Model sha','Hub License']]
     merged_df = get_merged_df(ori_dataframe, model_info_df)
     new_cols = ['T'] + [col for col in merged_df.columns if col != 'T']
     merged_df = merged_df[new_cols]
     print('Merged df: ', merged_df)
     return Leaderboard(
         value=merged_df,
-        # datatype=[c.type for c in fields(ModelInfoColumn)],
         select_columns=SelectColumns(
             default_selection=default_selection_list,
             # default_selection=[c.name for c in fields(ModelInfoColumn) if
@@ -183,96 +189,6 @@ with demo:
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=4):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=5):
-        #     with gr.Column():
-        #         with gr.Row():
-        #             gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-        #
-        #         with gr.Column():
-        #             with gr.Accordion(
-        #                     f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-        #                     open=False,
-        #             ):
-        #                 with gr.Row():
-        #                     finished_eval_table = gr.components.Dataframe(
-        #                         value=finished_eval_queue_df,
-        #                         headers=EVAL_COLS,
-        #                         datatype=EVAL_TYPES,
-        #                         row_count=5,
-        #                     )
-        #             with gr.Accordion(
-        #                     f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-        #                     open=False,
-        #             ):
-        #                 with gr.Row():
-        #                     running_eval_table = gr.components.Dataframe(
-        #                         value=running_eval_queue_df,
-        #                         headers=EVAL_COLS,
-        #                         datatype=EVAL_TYPES,
-        #                         row_count=5,
-        #                     )
-        #
-        #             with gr.Accordion(
-        #                     f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-        #                     open=False,
-        #             ):
-        #                 with gr.Row():
-        #                     pending_eval_table = gr.components.Dataframe(
-        #                         value=pending_eval_queue_df,
-        #                         headers=EVAL_COLS,
-        #                         datatype=EVAL_TYPES,
-        #                         row_count=5,
-        #                     )
-        #     with gr.Row():
-        #         gr.Markdown("# ✉️✨ Submit your model outputs !", elem_classes="markdown-text")
-        #         gr.Markdown(
-        #             "Send your model outputs for all the models using the ContextualBench code and email them to us at [email protected] ",
-        #             elem_classes="markdown-text")
-            # with gr.Row():
-            #     with gr.Column():
-            #         model_name_textbox = gr.Textbox(label="Model name")
-            #         revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-            #         model_type = gr.Dropdown(
-            #             choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-            #             label="Model type",
-            #             multiselect=False,
-            #             value=None,
-            #             interactive=True,
-            #         )
-            #     with gr.Column():
-            #         precision = gr.Dropdown(
-            #             choices=[i.value.name for i in Precision if i != Precision.Unknown],
-            #             label="Precision",
-            #             multiselect=False,
-            #             value="float16",
-            #             interactive=True,
-            #         )
-            #         weight_type = gr.Dropdown(
-            #             choices=[i.value.name for i in WeightType],
-            #             label="Weights type",
-            #             multiselect=False,
-            #             value="Original",
-            #             interactive=True,
-            #         )
-            #         base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            # submit_button = gr.Button("Submit Eval")
-            # submission_result = gr.Markdown()
-            # submit_button.click(
-            #     add_new_eval,
-            #     [
-            #         model_name_textbox,
-            #         base_model_name_textbox,
-            #         revision_name_textbox,
-            #         precision,
-            #         weight_type,
-            #         model_type,
-            #     ],
-            #     submission_result,
-            # )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(

     if ori_dataframe is None or ori_dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     model_info_col_list = [c.name for c in fields(ModelInfoColumn) if c.displayed_by_default if c.name not in ['#Params (B)', 'available_on_hub', 'hub', 'Model sha','Hub License']]
+    col2type_dict = {c.name: c.type for c in fields(ModelInfoColumn)}
     default_selection_list = list(ori_dataframe.columns) + model_info_col_list
+    # print('default_selection_list: ', default_selection_list)
     # ipdb.set_trace()
     # default_selection_list = [col for col in default_selection_list if col not in ['#Params (B)', 'available_on_hub', 'hub', 'Model sha','Hub License']]
     merged_df = get_merged_df(ori_dataframe, model_info_df)
     new_cols = ['T'] + [col for col in merged_df.columns if col != 'T']
     merged_df = merged_df[new_cols]
     print('Merged df: ', merged_df)
+    # get the data type
+    datatype_list = [col2type_dict[col] if col in col2type_dict else 'number' for col in merged_df.columns]
+    # print('datatype_list: ', datatype_list)
+    # print('merged_df.column: ', merged_df.columns)
+    # ipdb.set_trace()
     return Leaderboard(
         value=merged_df,
+        datatype=[c.type for c in fields(ModelInfoColumn)],
         select_columns=SelectColumns(
             default_selection=default_selection_list,
             # default_selection=[c.name for c in fields(ModelInfoColumn) if
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=4):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(

results/Chronos_small/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Chronos_small",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Chronos_small",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/amazon/chronos-t5-small"
 }

results/Moirai_base/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Moirai_base",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Moirai_base",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-base"
 }

results/Moirai_large/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Moirai_large",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Moirai_large",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large"
 }

results/Moirai_small/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Moirai_small",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Moirai_small",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/Salesforce/moirai-1.1-R-large"
 }

results/chronos_base/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Chronos_base",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Chronos_base",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/amazon/chronos-t5-base"
 }

results/chronos_large/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "Chronos_large",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "Chronos_large",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/amazon/chronos-t5-large"
 }

results/timer_small/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "model": "timer_small",
+    "model_type": "pretrained",
+    "model_dtype": "float32"
+}

results/timesfm/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
     "model": "TimesFM",
     "model_type": "pretrained",
-    "model_dtype": "float32"
 }

 {
     "model": "TimesFM",
     "model_type": "pretrained",
+    "model_dtype": "float32",
+    "model_link": "https://huggingface.co/google/timesfm-1.0-200m"
 }

src/display/utils.py CHANGED Viewed

@@ -27,14 +27,14 @@ model_info_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "
 model_info_dict.append(["model", ColumnContent, ColumnContent("model", "markdown", True, never_hidden=True)])
 # Model information
 model_info_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, True)])
-model_info_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
-model_info_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
 model_info_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False, True)])
 model_info_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, True)])
 model_info_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False, True)])
 model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False, True)])
 model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
-model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks
 ModelInfoColumn = make_dataclass("ModelInfoColumn", model_info_dict, frozen=True)

 model_info_dict.append(["model", ColumnContent, ColumnContent("model", "markdown", True, never_hidden=True)])
 # Model information
 model_info_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, True)])
+# model_info_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
+# model_info_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
 model_info_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False, True)])
 model_info_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, True)])
 model_info_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False, True)])
 model_info_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False, True)])
 model_info_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
+# model_info_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks
 ModelInfoColumn = make_dataclass("ModelInfoColumn", model_info_dict, frozen=True)

src/leaderboard/read_evals.py CHANGED Viewed

@@ -42,7 +42,7 @@ class ModelConfig:
     def to_dict(self):
         """Converts the model info to a dict compatible with our dataframe display"""
         data_dict = {
-            "model": self.model,
             'model_w_link': model_hyperlink(self.model_link, self.model),
             ModelInfoColumn.precision.name: self.precision.value.name,
             ModelInfoColumn.model_type.name: self.model_type.value.name,

     def to_dict(self):
         """Converts the model info to a dict compatible with our dataframe display"""
         data_dict = {
+            ModelInfoColumn.model.name: self.model,
             'model_w_link': model_hyperlink(self.model_link, self.model),
             ModelInfoColumn.precision.name: self.precision.value.name,
             ModelInfoColumn.model_type.name: self.model_type.value.name,