Spaces:

WwYc
/

explain-LXMERT

Running

App Files Files Community

WwYc commited on Mar 9, 2024

Commit

fa96553

verified ·

1 Parent(s): d54d66e

Update generic.py

Browse files

Files changed (1) hide show

generic.py +25 -33

generic.py CHANGED Viewed

@@ -88,50 +88,42 @@ class ModelUsage:
         return self.output
-def save_image_vis(image_file_path, bbox_scores):
-    bbox_scores = image_scores
-    _, top_bboxes_indices = bbox_scores.topk(k=1, dim=-1)
     img = cv2.imread(image_file_path)
     mask = torch.zeros(img.shape[0], img.shape[1])
-    for index in range(len(bbox_scores)):
         [x, y, w, h] = model_lrp.bboxes[0][index]
         curr_score_tensor = mask[int(y):int(h), int(x):int(w)]
-        new_score_tensor = torch.ones_like(curr_score_tensor) * bbox_scores[index].item()
         mask[int(y):int(h), int(x):int(w)] = torch.max(new_score_tensor, mask[int(y):int(h), int(x):int(w)])
     mask = (mask - mask.min()) / (mask.max() - mask.min())
     mask = mask.unsqueeze_(-1)
     mask = mask.expand(img.shape)
     img = img * mask.cpu().data.numpy()
     cv2.imwrite(
         'lxmert/lxmert/experiments/paper/new.jpg', img)
-    return img
-model_lrp = ModelUsage(use_lrp=True)
-lrp = GeneratorOurs(model_lrp)
-baselines = GeneratorBaselines(model_lrp)
-vqa_answers = utils.get_data(VQA_URL)
-image_ids = [
-    # giraffe
-    'COCO_val2014_000000185590',
-    # baseball
-    'COCO_val2014_000000127510',
-    # bath
-    'COCO_val2014_000000324266',
-    # frisbee
-    'COCO_val2014_000000200717'
-]
-test_questions_for_images = [
-    ################## paper samples
-    # giraffe
-    "is the animal eating?",
-    # baseball
-    "did he catch the ball?",
-    # bath
-    "is the tub white ?",
-    # frisbee
-    "did the man just catch the frisbee?"
-    ################## paper samples
-]

         return self.output
+model_lrp = ModelUsage(use_lrp=True)
+lrp = GeneratorOurs(model_lrp)
+baselines = GeneratorBaselines(model_lrp)
+vqa_answers = utils.get_data(VQA_URL)
+def save_image_vis(image_file_path， question):
+    R_t_t, R_t_i = lrp.generate_ours((image_file_path, quewtion), use_lrp=False,
+                                     normalize_self_attention=True,
+                                     method_name="ours")
+    image_scores = R_t_i[0]
+    text_scores = R_t_t[0]
+    # bbox_scores = image_scores
+    _, top_bboxes_indices = image_scores.topk(k=1, dim=-1)
     img = cv2.imread(image_file_path)
     mask = torch.zeros(img.shape[0], img.shape[1])
+    for index in range(len(image_scores)):
         [x, y, w, h] = model_lrp.bboxes[0][index]
         curr_score_tensor = mask[int(y):int(h), int(x):int(w)]
+        new_score_tensor = torch.ones_like(curr_score_tensor) * image_scores[index].item()
         mask[int(y):int(h), int(x):int(w)] = torch.max(new_score_tensor, mask[int(y):int(h), int(x):int(w)])
     mask = (mask - mask.min()) / (mask.max() - mask.min())
     mask = mask.unsqueeze_(-1)
     mask = mask.expand(img.shape)
     img = img * mask.cpu().data.numpy()
+    # img = Image.fromarray(np.uint8(img)).convert('RGB')
     cv2.imwrite(
         'lxmert/lxmert/experiments/paper/new.jpg', img)
+    text_scores = (text_scores - text_scores.min()) / (text_scores.max() - text_scores.min())
+    vis_data_records = [visualization.VisualizationDataRecord(text_scores, 0, 0, 0, 0, 0, model_lrp.question_tokens, 1)]
+    html1 = visualization.visualize_text(vis_data_records)
+    return html1.data