Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	File size: 4,320 Bytes
			
			| 5d57406 6de388e 5d57406 6de388e 5d57406 6de388e 5d57406 6de388e 5d57406 6de388e a77e097 6de388e 5d57406 6de388e 5aa60a6 6de388e 5d57406 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | import json
import os
import glob
import argparse
import csv
def chatgpt_json(merge_file):
    # chat results
    merge_data = merge_file.decode("utf-8")
    merge_data = merge_data.replace(": true,", ": \"true\",")
    merge_data = merge_data.replace(": false,", ": \"false\",")
    merge_data = eval(merge_data)
    dataset_scores_dict = {}
    for dataset_name, dataset_results in merge_data.items():
        correct, total_nums = 0, 0
        for id in dataset_results:
            for dim in dataset_results[id]:
                for result in dataset_results[id][dim]:
                    correct += result['rating']
                    total_nums += 1
        dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
        # dataset_scores_dict[dataset_name] = round(correct / total_nums , 4)
    return dataset_scores_dict
def compute_scores(merge_file):
    merge_data = merge_file.decode("utf-8")
    merge_data = merge_data.replace(": true,", ": \"true\",")
    merge_data = merge_data.replace(": false,", ": \"false\",")
    merge_data = merge_data.replace(": null,", ": \"null\",")
    merge_data = eval(merge_data)
    dataset_scores_dict = {}
    total_correct, total_num = 0, 0
    eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg']
    for dataset_name, dataset_results in merge_data.items():
        dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims}
        for id in dataset_results:
            for dim in dataset_results[id]:
                for result in dataset_results[id][dim]:
                    dataset_correct['avg'] += result['rating']
                    dataset_correct[dim] += result['rating']
                    dataset_num['avg'] += 1
                    dataset_num[dim] += 1
        total_correct += dataset_correct['avg']
        total_num += dataset_num['avg']
        for dim in eval_dims:
            dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2)
    dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2)
    # print(dataset_score_dict)
    # with open(args.score_output_file, 'w', encoding='utf-8') as f:
    #   json.dump(dataset_score_dict, f, indent=2)
    # print(f'{args.score_output_file} is saved!')
    # ========================
    data = [
        ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
                "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
                "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
                "Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation",
                "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"],
        [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
         dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], 
         dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], 
         dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], 
         dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'], 
         dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'], 
         ],
    ]
    return data
 |