Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		future-xy
		
	commited on
		
		
					Commit 
							
							·
						
						82a6ed1
	
1
								Parent(s):
							
							a4a186c
								
fix result display bug
Browse files- src/backend/envs.py +1 -1
- src/display/utils.py +16 -18
    	
        src/backend/envs.py
    CHANGED
    
    | @@ -43,7 +43,7 @@ class Tasks(Enum): | |
| 43 |  | 
| 44 | 
             
                # task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
         | 
| 45 |  | 
| 46 | 
            -
                task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
         | 
| 47 |  | 
| 48 | 
             
                # task15 = Task("fever10", "acc", "FEVER", 16)
         | 
| 49 | 
             
                # task15_1 = Task("fever11", "acc", "FEVER", 8)
         | 
|  | |
| 43 |  | 
| 44 | 
             
                # task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
         | 
| 45 |  | 
| 46 | 
            +
                # task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
         | 
| 47 |  | 
| 48 | 
             
                # task15 = Task("fever10", "acc", "FEVER", 16)
         | 
| 49 | 
             
                # task15_1 = Task("fever11", "acc", "FEVER", 8)
         | 
    	
        src/display/utils.py
    CHANGED
    
    | @@ -45,8 +45,8 @@ class Tasks(Enum): | |
| 45 | 
             
                # halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
         | 
| 46 |  | 
| 47 | 
             
                # # XXX include me back at some point
         | 
| 48 | 
            -
                selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
         | 
| 49 | 
            -
                mmlu = Task(" | 
| 50 |  | 
| 51 |  | 
| 52 | 
             
            # These classes are for user facing column names,
         | 
| @@ -63,11 +63,9 @@ class ColumnContent: | |
| 63 |  | 
| 64 |  | 
| 65 | 
             
            auto_eval_column_dict = []
         | 
| 66 | 
            -
            auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)])
         | 
| 67 | 
            -
            auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware", "str", True, never_hidden=True)])
         | 
| 68 | 
             
            # Init
         | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 |  | 
| 72 | 
             
            # #Scores
         | 
| 73 | 
             
            # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
         | 
| @@ -75,18 +73,18 @@ auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware | |
| 75 | 
             
            for task in Tasks:
         | 
| 76 | 
             
                auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
         | 
| 77 |  | 
| 78 | 
            -
            #  | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
            #  | 
| 89 | 
            -
             | 
| 90 |  | 
| 91 | 
             
            # We use make dataclass to dynamically fill the scores from Tasks
         | 
| 92 | 
             
            AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
         | 
|  | |
| 45 | 
             
                # halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
         | 
| 46 |  | 
| 47 | 
             
                # # XXX include me back at some point
         | 
| 48 | 
            +
                # selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
         | 
| 49 | 
            +
                mmlu = Task("mmlu", "acc", "MMLU/Acc")
         | 
| 50 |  | 
| 51 |  | 
| 52 | 
             
            # These classes are for user facing column names,
         | 
|  | |
| 63 |  | 
| 64 |  | 
| 65 | 
             
            auto_eval_column_dict = []
         | 
|  | |
|  | |
| 66 | 
             
            # Init
         | 
| 67 | 
            +
            auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
         | 
| 68 | 
            +
            auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
         | 
| 69 |  | 
| 70 | 
             
            # #Scores
         | 
| 71 | 
             
            # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
         | 
|  | |
| 73 | 
             
            for task in Tasks:
         | 
| 74 | 
             
                auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
         | 
| 75 |  | 
| 76 | 
            +
            # Model information
         | 
| 77 | 
            +
            auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
         | 
| 78 | 
            +
            auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
         | 
| 79 | 
            +
            auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
         | 
| 80 | 
            +
            auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
         | 
| 81 | 
            +
            auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
         | 
| 82 | 
            +
            auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
         | 
| 83 | 
            +
            auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
         | 
| 84 | 
            +
            auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
         | 
| 85 | 
            +
            auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
         | 
| 86 | 
            +
            # Dummy column for the search bar (hidden by the custom CSS)
         | 
| 87 | 
            +
            auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
         | 
| 88 |  | 
| 89 | 
             
            # We use make dataclass to dynamically fill the scores from Tasks
         | 
| 90 | 
             
            AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
         |