Spaces:
Runtime error
Runtime error
File size: 5,972 Bytes
be10055 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import sys
sys.path.append('.')
import json
import pandas as pd
import csv
from sentence_transformers import SentenceTransformer, util
from minigpt4.common.eval_utils import computeIoU
# Load pre-trained BERT model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
# BERT similarity function will be utilized in the two following functions
def compute_bert_similarity(prediction_caption, ground_truth_caption):
prediction_embedding = model.encode([prediction_caption])
ground_truth_embedding = model.encode([ground_truth_caption])
similarity = util.pytorch_cos_sim(prediction_embedding, ground_truth_embedding)[0][0].item()
return similarity
def MIMIC_BERT_Sim(gt_pth, pred_pth, output_csv):
# Read the ground truth and prediction JSON files
with open(gt_pth, 'r') as f:
ground_truth_data = json.load(f)
with open(pred_pth, 'r') as f:
prediction_data = json.load(f)
# Create a list to store BERT similarity data
bert_similarity_data = []
# Initialize variables to calculate the average
total_similarity = 0
total_count = 0
# Iterate over each item in the prediction_data list
for item in prediction_data:
# Extract the image_id and corresponding prediction caption
image_id = item["image_id"]
prediction_caption = item["caption"]
# Search for the matching ground truth caption based on image_id
ground_truth_caption = None
for gt_item in ground_truth_data:
if gt_item["image_id"] == image_id:
ground_truth_caption = gt_item["caption"]
break
if ground_truth_caption is not None:
bert_similarity = compute_bert_similarity(prediction_caption, ground_truth_caption)
bert_similarity_data.append({"image_id": image_id, "BERT_score": bert_similarity})
total_similarity += bert_similarity
total_count += 1
average_similarity = total_similarity / total_count if total_count > 0 else 0
df = pd.DataFrame(bert_similarity_data)
df_sorted = df.sort_values(by="BERT_score", ascending=True)
df_sorted.to_csv(output_csv, index=False)
return average_similarity
def VQA_BERT_Sim(gt_pth, pred_pth, output_csv):
# Load ground truth JSON file
with open(gt_pth, 'r') as file:
gt_data = json.load(file)
# Load prediction JSON file
with open(pred_pth, 'r') as file:
prediction_data = json.load(file)
gt_qa_pairs = {(entry['image_name'], entry['question']): entry['answer'] for entry in gt_data}
def convert_to_dict(data):
qa_dict = {}
for image_name, qa_list in data.items():
for qa in qa_list:
key = (image_name, qa['question'])
qa_dict[key] = qa['answer']
return qa_dict
pred_qa_dict = convert_to_dict(prediction_data)
# Compute BERT similarity and create a list of results
results = []
for key, gt_answer in gt_qa_pairs.items():
if key in pred_qa_dict:
pred_answer = pred_qa_dict[key]
gt_answer = str(gt_answer)
pred_answer = str(pred_answer)
# Compute BERT similarity
similarity_score = compute_bert_similarity(pred_answer, gt_answer)
# Append the result to the list
results.append({
"img_name": key[0],
"question": key[1],
"answer": pred_answer,
"BERT_score": similarity_score
})
average_similarity = sum(entry["BERT_score"] for entry in results) / len(results) if results else 0
df = pd.DataFrame(results)
df_sorted = df.sort_values(by="BERT_score", ascending=True)
df_sorted.to_csv(output_csv, index=False)
print(f"Average BERT similarity score: {average_similarity}")
#################################
##############IoU################
#################################
def preprocess_bbox(bbox, original_size, image_size):
x1 = int((bbox[0] / original_size) * image_size)
y1 = int((bbox[1] / original_size) * image_size)
x2 = int((bbox[2] / original_size) * image_size)
y2 = int((bbox[3] / original_size) * image_size)
return [x1, y1, x2, y2]
def average_iou(gt_pth, pred_pth, original_size, image_size, dataset_name, csv_filename):
# Load ground truth
with open(gt_pth, 'r') as file:
ground_truth = json.load(file)
# Load predictions
with open(pred_pth, 'r') as file:
predictions = json.load(file)
iou_list = []
with open(csv_filename, 'w', newline='') as csvfile:
fieldnames = ['image_name', 'IoU']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for gt_item in ground_truth:
gt_key = gt_item['key']
gt_bboxes = gt_item['bbox']
original_size = gt_item['height']
gt_processed_bboxes = [preprocess_bbox(bbox, original_size, image_size) for bbox in gt_bboxes]
for pred_item in predictions:
pred_key = pred_item['key'].replace(".png", "")
if gt_key == pred_key:
pred_bboxes = pred_item['bbox']
try:
for gt_bbox in gt_processed_bboxes:
for pred_bbox in pred_bboxes:
iou = computeIoU(gt_bbox, pred_bbox)
iou_list.append(iou)
writer.writerow({'image_name': gt_key, 'IoU': iou})
print(gt_key)
print(iou)
except Exception as e:
print("gt_bbox: ", gt_bbox)
print("gt_bbox: ", pred_bboxes)
# average_iou = sum(iou_list) / len(iou_list)
# print(f"Average IoU for dataset {dataset_name}: {average_iou:.4f}") |