Jingkang commited on
Commit
6132e50
Β·
1 Parent(s): 0ccb265

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -71,10 +71,19 @@ def draw_object_mask(mask, draw):
71
 
72
 
73
  def vis_selected(pil_image, coords):
 
 
 
 
 
 
 
 
74
  # get coords
75
  coords_x, coords_y = coords.split(',')
76
  input_point = np.array([[int(coords_x), int(coords_y)]])
77
  input_label = np.array([1])
 
78
  # load image
79
  image = np.array(pil_image)
80
  predictor.set_image(image)
@@ -159,10 +168,13 @@ def relate_selected(input_image, k, coords):
159
  pil_image = input_image.convert('RGBA')
160
 
161
  w, h = pil_image.size
162
- if w > 800:
163
- pil_image.thumbnail((800, 800*h/w))
164
- input_image.thumbnail((800, 800*h/w))
165
- coords = str(int(int(coords.split(',')[0]) * 800 / w)) + ',' + str(int(int(coords.split(',')[1]) * 800 / w))
 
 
 
166
 
167
  image = np.array(input_image)
168
  sam_masks = mask_generator.generate(image)
@@ -212,12 +224,15 @@ def relate_selected(input_image, k, coords):
212
 
213
 
214
  def relate_anything(input_image, k):
 
 
 
 
 
 
 
215
  # load image
216
  pil_image = input_image.convert('RGBA')
217
- w, h = pil_image.size
218
- if w > 800:
219
- pil_image.thumbnail((800, 800*h/w))
220
- input_image.thumbnail((800, 800*h/w))
221
  image = np.array(input_image)
222
  sam_masks = mask_generator.generate(image)
223
  filtered_masks = sort_and_deduplicate(sam_masks)
@@ -251,9 +266,10 @@ def relate_anything(input_image, k):
251
 
252
  DESCRIPTION = '''# Relate-Anyting
253
 
254
- ### πŸš€ πŸš€ πŸš€ This is a demo that combine Meta's Segment-Anything model with the ECCV'22 paper: [Panoptic Scene Graph Generation](https://psgdataset.org/).
255
-
256
- ### πŸ”₯πŸ”₯πŸ”₯ Please star our codebase [openpsg](https://github.com/Jingkang50/OpenPSG) and [RAM](https://github.com/Luodian/RelateAnything) if you find it useful / interesting.
 
257
  '''
258
 
259
  block = gr.Blocks()
@@ -268,8 +284,8 @@ with block:
268
  num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1)
269
  relate_all_button = gr.Button(label="Relate Anything!")
270
 
271
- with gr.Tab("Relate me with Anything"):
272
- img_input_coords = gr.Textbox(label="Click anything to get input coords")
273
 
274
  def select_handler(evt: gr.SelectData):
275
  coords = evt.index
@@ -277,7 +293,7 @@ with block:
277
 
278
  input_image.select(select_handler, None, img_input_coords)
279
  run_button_vis = gr.Button(label="Visualize the Select Thing")
280
- selected_gallery = gr.Gallery(label="Selected Thing", show_label=True, elem_id="gallery").style(preview=True, grid=2, object_fit="scale-down")
281
 
282
  k = gr.Slider(label="Number of things you want to relate", minimum=1, maximum=20, value=5, step=1)
283
  relate_selected_button = gr.Button(value="Relate it with Anything", interactive=True)
 
71
 
72
 
73
  def vis_selected(pil_image, coords):
74
+ w, h = pil_image.size
75
+ max_edge = 1500
76
+ if w > max_edge or h > max_edge:
77
+ ratio = max(w, h) / max_edge
78
+ new_size = (int(w / ratio), int(h / ratio))
79
+ pil_image.thumbnail(new_size)
80
+ coords = str(int(int(coords.split(',')[0]) * new_size[0] / w)) + ',' + str(int(int(coords.split(',')[1]) * new_size[1] / h))
81
+
82
  # get coords
83
  coords_x, coords_y = coords.split(',')
84
  input_point = np.array([[int(coords_x), int(coords_y)]])
85
  input_label = np.array([1])
86
+
87
  # load image
88
  image = np.array(pil_image)
89
  predictor.set_image(image)
 
168
  pil_image = input_image.convert('RGBA')
169
 
170
  w, h = pil_image.size
171
+ max_edge = 1500
172
+ if w > max_edge or h > max_edge:
173
+ ratio = max(w, h) / max_edge
174
+ new_size = (int(w / ratio), int(h / ratio))
175
+ pil_image.thumbnail(new_size)
176
+ input_image.thumbnail(new_size)
177
+ coords = str(int(int(coords.split(',')[0]) * new_size[0] / w)) + ',' + str(int(int(coords.split(',')[1]) * new_size[1] / h))
178
 
179
  image = np.array(input_image)
180
  sam_masks = mask_generator.generate(image)
 
224
 
225
 
226
  def relate_anything(input_image, k):
227
+ w, h = input_image.size
228
+ max_edge = 1500
229
+ if w > max_edge or h > max_edge:
230
+ ratio = max(w, h) / max_edge
231
+ new_size = (int(w / ratio), int(h / ratio))
232
+ input_image.thumbnail(new_size)
233
+
234
  # load image
235
  pil_image = input_image.convert('RGBA')
 
 
 
 
236
  image = np.array(input_image)
237
  sam_masks = mask_generator.generate(image)
238
  filtered_masks = sort_and_deduplicate(sam_masks)
 
266
 
267
  DESCRIPTION = '''# Relate-Anyting
268
 
269
+ ### πŸš€ πŸš€ πŸš€ RAM (Relate-Anything-Model) combines Meta's Segment-Anything model with the ECCV'22 paper: [Panoptic Scene Graph Generation](https://psgdataset.org/).
270
+ ### πŸ€” πŸ€” πŸ€” Given an image, RAM finds all the meaningful relations between anything. (Check Tab: Relate Anything)
271
+ ### πŸ–±οΈ πŸ–±οΈ πŸ–±οΈ You can also click something on the image, and RAM find anything relates to that. (Check Tab: Relate Something)
272
+ ### πŸ”₯ πŸ”₯ πŸ”₯ Please star our codebase [OpenPSG](https://github.com/Jingkang50/OpenPSG) and [RAM](https://github.com/Luodian/RelateAnything) if you find it useful / interesting.
273
  '''
274
 
275
  block = gr.Blocks()
 
284
  num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1)
285
  relate_all_button = gr.Button(label="Relate Anything!")
286
 
287
+ with gr.Tab("Relate Something"):
288
+ img_input_coords = gr.Textbox(label="Click something to get input coords")
289
 
290
  def select_handler(evt: gr.SelectData):
291
  coords = evt.index
 
293
 
294
  input_image.select(select_handler, None, img_input_coords)
295
  run_button_vis = gr.Button(label="Visualize the Select Thing")
296
+ selected_gallery = gr.Gallery(label="Selected Thing", show_label=True, elem_id="gallery").style(object_fit="scale-down")
297
 
298
  k = gr.Slider(label="Number of things you want to relate", minimum=1, maximum=20, value=5, step=1)
299
  relate_selected_button = gr.Button(value="Relate it with Anything", interactive=True)