|
|
|
import json |
|
import collections |
|
import argparse |
|
from pathlib import Path |
|
|
|
def eval_video_chair(file_name, metric): |
|
with file_name.open("r") as json_file: |
|
data = json.load(json_file) |
|
|
|
items = {} |
|
coverages = collections.defaultdict(list) |
|
hallucinations = collections.defaultdict(list) |
|
buckets = ['subjects', 'attributes', 'activities', 'locations', 'text_overlays'] |
|
index = 0 |
|
for object_id, tag_info in data.items(): |
|
items[object_id] = index |
|
for tag in buckets: |
|
if tag in tag_info: |
|
cvg = round(tag_info[tag][0]*100 / tag_info[tag][1], 2) |
|
coverages[tag].append(cvg) if metric == "coverage" else hallucinations[tag].append(round(100 - cvg, 2)) |
|
else: |
|
coverages[tag].append(-100) if metric == "coverage" else hallucinations[tag].append(-100) |
|
index += 1 |
|
return (items, coverages) if metric == "coverage" else (items, hallucinations) |
|
|
|
|
|
def get_dict_val(inputs, items, key): |
|
for dd in inputs: |
|
if str(dd["object_id"]) == str(items): |
|
return dd["cap_info"][key] if key in dd["cap_info"] else [] |
|
return [] |
|
|
|
|
|
def get_instance_result(pred_file, gt_file, coverage_file, hallucination_file, save_file): |
|
buckets = ['subjects', 'attributes', 'activities', 'locations', 'text_overlays'] |
|
pred = json.load(open(pred_file, "r")) |
|
gt = json.load(open(gt_file, "r")) |
|
output_dir = Path(pred_file).parent |
|
|
|
items1, coverages = eval_video_chair(output_dir / coverage_file, "coverage") |
|
items2, hallucinations = eval_video_chair(output_dir / hallucination_file, "hallucination") |
|
|
|
gt_map = {str(item['object_id']): item for item in gt} |
|
pred_map = {str(item['object_id']): item for item in pred} |
|
|
|
out = [] |
|
for obj_id, idx_1 in items1.items(): |
|
if obj_id not in items2: |
|
continue |
|
idx_2 = items2[obj_id] |
|
res = {} |
|
for key in buckets: |
|
res["object_id"] = obj_id |
|
res["coverage_"+key] = coverages[key][idx_1] if coverages[key][idx_1] != -100 else "N/A" |
|
res["hallucination_"+key] = hallucinations[key][idx_2] if hallucinations[key][idx_2] != -100 else "N/A" |
|
if key == "attributes": |
|
continue |
|
res["pred_"+key] = get_dict_val(pred, obj_id, key) |
|
res["gt_"+key] = get_dict_val(gt, obj_id, key) |
|
res['masp_inference'] = pred_map[obj_id]['masp_inference'] |
|
res['refine_caption'] = gt_map[obj_id]['refine_caption'] |
|
out.append(res) |
|
|
|
|
|
with (output_dir / save_file).open("w") as json_data: |
|
json.dump(out, json_data, indent=4) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--pred_file", type=str, default='/mnt/bn/algo-masp-nas-2/xiangchen/model/masp_models/checkpoints/llava-mistral_gpt4v_public800k_unfreeze_qformer/video_chair/video_chair_1k_res_info.json') |
|
parser.add_argument("--gt_file", type=str, default='/mnt/bn/algo-masp-nas-2/kaili.zhao/data/masp_data/eval/eval_v1.0/eval_benchmark_pos_diverse_1k_11policies_gt.json') |
|
parser.add_argument("--coverage_file", type=str, default='each_video_coverage_detail.json') |
|
parser.add_argument("--hallucination_file", type=str, default='each_video_halluciantion_detail.json') |
|
parser.add_argument("--save_file", type=str, default='video_chair_final.json') |
|
args = parser.parse_args() |
|
get_instance_result(args.pred_file, args.gt_file, args.coverage_file, args.hallucination_file, args.save_file) |
|
print(f"===== Completed video chair for each individual computation! =====") |
|
|
|
|