Spaces:

mmlab-ntu
/

relate-anything-model

Build error

App Files Files Community

Jingkang commited on Apr 24, 2023

Commit

6132e50

1 Parent(s): 0ccb265

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -14

app.py CHANGED Viewed

@@ -71,10 +71,19 @@ def draw_object_mask(mask, draw):
 def vis_selected(pil_image, coords):
     # get coords
     coords_x, coords_y = coords.split(',')
     input_point = np.array([[int(coords_x), int(coords_y)]])
     input_label = np.array([1])
     # load image
     image = np.array(pil_image)
     predictor.set_image(image)
@@ -159,10 +168,13 @@ def relate_selected(input_image, k, coords):
     pil_image = input_image.convert('RGBA')
     w, h = pil_image.size
-    if w > 800:
-        pil_image.thumbnail((800, 800*h/w))
-        input_image.thumbnail((800, 800*h/w))
-        coords = str(int(int(coords.split(',')[0]) * 800 / w)) + ',' + str(int(int(coords.split(',')[1]) * 800 / w))
     image = np.array(input_image)
     sam_masks = mask_generator.generate(image)
@@ -212,12 +224,15 @@ def relate_selected(input_image, k, coords):
 def relate_anything(input_image, k):
     # load image
     pil_image = input_image.convert('RGBA')
-    w, h = pil_image.size
-    if w > 800:
-        pil_image.thumbnail((800, 800*h/w))
-        input_image.thumbnail((800, 800*h/w))
     image = np.array(input_image)
     sam_masks = mask_generator.generate(image)
     filtered_masks = sort_and_deduplicate(sam_masks)
@@ -251,9 +266,10 @@ def relate_anything(input_image, k):
 DESCRIPTION = '''# Relate-Anyting
-### 🚀 🚀 🚀 This is a demo that combine Meta's Segment-Anything model with the ECCV'22 paper: [Panoptic Scene Graph Generation](https://psgdataset.org/).
-### 🔥🔥🔥 Please star our codebase [openpsg](https://github.com/Jingkang50/OpenPSG) and [RAM](https://github.com/Luodian/RelateAnything) if you find it useful / interesting.
 '''
 block = gr.Blocks()
@@ -268,8 +284,8 @@ with block:
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1)
                 relate_all_button = gr.Button(label="Relate Anything!")
-            with gr.Tab("Relate me with Anything"):
-                img_input_coords = gr.Textbox(label="Click anything to get input coords")
                 def select_handler(evt: gr.SelectData):
                     coords = evt.index
@@ -277,7 +293,7 @@ with block:
                 input_image.select(select_handler, None, img_input_coords)
                 run_button_vis = gr.Button(label="Visualize the Select Thing")
-                selected_gallery = gr.Gallery(label="Selected Thing", show_label=True, elem_id="gallery").style(preview=True, grid=2, object_fit="scale-down")
                 k = gr.Slider(label="Number of things you want to relate", minimum=1, maximum=20, value=5, step=1)
                 relate_selected_button = gr.Button(value="Relate it with Anything", interactive=True)

 def vis_selected(pil_image, coords):
+    w, h = pil_image.size
+    max_edge = 1500
+    if w > max_edge or h > max_edge:
+        ratio = max(w, h) / max_edge
+        new_size = (int(w / ratio), int(h / ratio))
+        pil_image.thumbnail(new_size)
+        coords = str(int(int(coords.split(',')[0]) * new_size[0] / w)) + ',' + str(int(int(coords.split(',')[1]) * new_size[1] / h))
     # get coords
     coords_x, coords_y = coords.split(',')
     input_point = np.array([[int(coords_x), int(coords_y)]])
     input_label = np.array([1])
     # load image
     image = np.array(pil_image)
     predictor.set_image(image)
     pil_image = input_image.convert('RGBA')
     w, h = pil_image.size
+    max_edge = 1500
+    if w > max_edge or h > max_edge:
+        ratio = max(w, h) / max_edge
+        new_size = (int(w / ratio), int(h / ratio))
+        pil_image.thumbnail(new_size)
+        input_image.thumbnail(new_size)
+        coords = str(int(int(coords.split(',')[0]) * new_size[0] / w)) + ',' + str(int(int(coords.split(',')[1]) * new_size[1] / h))
     image = np.array(input_image)
     sam_masks = mask_generator.generate(image)
 def relate_anything(input_image, k):
+    w, h = input_image.size
+    max_edge = 1500
+    if w > max_edge or h > max_edge:
+        ratio = max(w, h) / max_edge
+        new_size = (int(w / ratio), int(h / ratio))
+        input_image.thumbnail(new_size)
     # load image
     pil_image = input_image.convert('RGBA')
     image = np.array(input_image)
     sam_masks = mask_generator.generate(image)
     filtered_masks = sort_and_deduplicate(sam_masks)
 DESCRIPTION = '''# Relate-Anyting
+### 🚀 🚀 🚀 RAM (Relate-Anything-Model) combines Meta's Segment-Anything model with the ECCV'22 paper: [Panoptic Scene Graph Generation](https://psgdataset.org/).
+### 🤔 🤔 🤔 Given an image, RAM finds all the meaningful relations between anything. (Check Tab: Relate Anything)
+### 🖱️ 🖱️ 🖱️ You can also click something on the image, and RAM find anything relates to that. (Check Tab: Relate Something)
+### 🔥 🔥 🔥 Please star our codebase [OpenPSG](https://github.com/Jingkang50/OpenPSG) and [RAM](https://github.com/Luodian/RelateAnything) if you find it useful / interesting.
 '''
 block = gr.Blocks()
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1)
                 relate_all_button = gr.Button(label="Relate Anything!")
+            with gr.Tab("Relate Something"):
+                img_input_coords = gr.Textbox(label="Click something to get input coords")
                 def select_handler(evt: gr.SelectData):
                     coords = evt.index
                 input_image.select(select_handler, None, img_input_coords)
                 run_button_vis = gr.Button(label="Visualize the Select Thing")
+                selected_gallery = gr.Gallery(label="Selected Thing", show_label=True, elem_id="gallery").style(object_fit="scale-down")
                 k = gr.Slider(label="Number of things you want to relate", minimum=1, maximum=20, value=5, step=1)
                 relate_selected_button = gr.Button(value="Relate it with Anything", interactive=True)