|
import os |
|
import json |
|
from tasks.eval.eval_utils import ( |
|
dump_json, |
|
load_json, |
|
EvalDataset, |
|
) |
|
|
|
|
|
def check_ans(pred, gt): |
|
flag = False |
|
|
|
pred_list = pred.lower().split(' ') |
|
pred_option, pred_content = pred_list[0], ' '.join(pred_list[1:]) |
|
gt_list = gt.lower().split(' ') |
|
gt_option, gt_content = gt_list[0], ' '.join(gt_list[1:]) |
|
if gt_content[-1] == '.': |
|
gt_content = gt_content[:-1] |
|
|
|
if not any([c in pred_option for c in 'abcdefgABCDEFG']): |
|
print(f"model doesn't follow instructions: {pred}") |
|
elif pred_option.replace('.', '') in gt_option: |
|
flag = True |
|
elif gt_option in pred_option: |
|
flag = True |
|
|
|
return flag |
|
|
|
def save_results(result_list, save_path): |
|
|
|
final_res, acc_dict = {}, {} |
|
correct, total = 0, 0 |
|
for res in result_list: |
|
task_type = res['task_type'] |
|
if task_type not in acc_dict: |
|
acc_dict[task_type] = [0, 0] |
|
acc_dict[task_type][1] += 1 |
|
total += 1 |
|
pred = res['pred'] |
|
gt = res['gt'] |
|
if check_ans(pred=pred, gt=gt): |
|
acc_dict[task_type][0] += 1 |
|
correct += 1 |
|
|
|
for k, v in acc_dict.items(): |
|
final_res[k] = v[0] / v[1] * 100 |
|
correct += v[0] |
|
total += v[1] |
|
final_res['Avg'] = correct / total * 100 |
|
|
|
all_results = { |
|
"acc_dict": acc_dict, |
|
"result_list": result_list |
|
} |
|
dump_json(all_results, save_path, 'all_results.json') |
|
dump_json(final_res, save_path, 'upload_leaderboard.json') |
|
|
|
def load_results(save_path): |
|
all_results = load_json(save_path, 'all_results.json') |
|
if all_results is not None: |
|
result_list = all_results['result_list'] |
|
else: |
|
result_list = None |
|
|
|
return result_list |
|
|
|
class MVBenchDataset(EvalDataset): |
|
data_list_info = { |
|
|
|
"Action Sequence": ("action_sequence.json", "DATAS/MVBench/video/star/Charades_v1_480/", "video", True), |
|
"Action Prediction": ("action_prediction.json", "DATAS/MVBench/video/star/Charades_v1_480/", "video", True), |
|
"Action Antonym": ("action_antonym.json", "DATAS/MVBench/video/ssv2_video/", "video", False), |
|
"Fine-grained Action": ("fine_grained_action.json", "DATAS/MVBench/video/Moments_in_Time_Raw/videos/", "video", False), |
|
"Unexpected Action": ("unexpected_action.json", "DATAS/MVBench/video/FunQA_test/test/", "video", False), |
|
"Object Existence": ("object_existence.json", "DATAS/MVBench/video/clevrer/video_validation/", "video", False), |
|
"Object Interaction": ("object_interaction.json", "DATAS/MVBench/video/star/Charades_v1_480/", "video", True), |
|
"Object Shuffle": ("object_shuffle.json", "DATAS/MVBench/video/perception/videos/", "video", False), |
|
"Moving Direction": ("moving_direction.json", "DATAS/MVBench/video/clevrer/video_validation/", "video", False), |
|
"Action Localization": ("action_localization.json", "DATAS/MVBench/video/sta/sta_video/", "video", True), |
|
"Scene Transition": ("scene_transition.json", "DATAS/MVBench/video/scene_qa/video/", "video", False), |
|
"Action Count": ("action_count.json", "DATAS/MVBench/video/perception/videos/", "video", False), |
|
"Moving Count": ("moving_count.json", "DATAS/MVBench/video/clevrer/video_validation/", "video", False), |
|
"Moving Attribute": ("moving_attribute.json", "DATAS/MVBench/video/clevrer/video_validation/", "video", False), |
|
"State Change": ("state_change.json", "DATAS/MVBench/video/perception/videos/", "video", False), |
|
"Fine-grained Pose": ("fine_grained_pose.json", "DATAS/MVBench/video/nturgbd/", "video", False), |
|
"Character Order": ("character_order.json", "DATAS/MVBench/video/perception/videos/", "video", False), |
|
"Egocentric Navigation": ("egocentric_navigation.json", "DATAS/MVBench/video/vlnqa/", "video", False), |
|
"Episodic Reasoning": ("episodic_reasoning.json", "DATAS/MVBench/video/tvqa/frames_fps3_hq/", "frame", True), |
|
"Counterfactual Inference": ("counterfactual_inference.json", "DATAS/MVBench/video/clevrer/video_validation/", "video", False), |
|
} |
|
data_dir = "DATAS/MVBench/json" |
|
|
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
|
|
data_list_info = self.data_list_info |
|
data_dir = self.data_dir |
|
|
|
self.data_list = [] |
|
for k, v in data_list_info.items(): |
|
with open(os.path.join(data_dir, v[0]), 'r') as f: |
|
json_data = json.load(f) |
|
for data in json_data: |
|
self.data_list.append({ |
|
'task_type': k, |
|
'prefix': v[1], |
|
'data_type': v[2], |
|
'bound': v[3], |
|
'data': data |
|
}) |
|
|
|
self.decord_method = { |
|
'video': self.read_video, |
|
'gif': self.read_gif, |
|
'frame': self.read_frame, |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __getitem__(self, idx): |
|
question, answer = self.qa_template(self.data_list[idx]['data']) |
|
task_type = self.data_list[idx]['task_type'] |
|
decord_method = self.decord_method[self.data_list[idx]['data_type']] |
|
bound = None |
|
if self.data_list[idx]['bound']: |
|
bound = ( |
|
self.data_list[idx]['data']['start'], |
|
self.data_list[idx]['data']['end'], |
|
) |
|
video_path = os.path.join(self.data_list[idx]['prefix'], self.data_list[idx]['data']['video']) |
|
|
|
|
|
|
|
try: |
|
images_group = decord_method(video_path, bound) |
|
except Exception as e: |
|
print(f'error decoding {video_path}') |
|
task_type = 'error_reading_video' |
|
images_group = None |
|
|
|
return { |
|
'video_path': video_path, |
|
'video_pils': images_group, |
|
'question': question, |
|
'answer': answer, |
|
'task_type': task_type, |
|
} |
|
|
|
|
|
def qa_template(self, data): |
|
question = f"Question: {data['question']}\n" |
|
question += "Options:\n" |
|
answer = data['answer'] |
|
answer_idx = -1 |
|
for idx, c in enumerate(data['candidates']): |
|
question += f"({chr(ord('A') + idx)}) {c}\n" |
|
if c == answer: |
|
answer_idx = idx |
|
question = question.rstrip() |
|
answer = f"({chr(ord('A') + answer_idx)}) {answer}" |
|
return question, answer |
|
|
|
|