ivelin commited on
Commit
134af2f
·
1 Parent(s): 77bc4ff

fix: show annotated image in web ui

Browse files

Signed-off-by: ivelin <[email protected]>

app.py CHANGED
@@ -95,7 +95,7 @@ def translate_point_coords_from_out_to_in(point=None, input_image_size=None, out
95
  f"translated point={point}, resized_image_size: {resized_width, resized_height}")
96
 
97
 
98
- def process_refexp(image: Image, prompt: str, model_revision: str = 'main', return_annotated_image: bool = False):
99
 
100
  print(f"(image, prompt): {image}, {prompt}")
101
 
@@ -205,28 +205,28 @@ def process_refexp(image: Image, prompt: str, model_revision: str = 'main', retu
205
  title = "Demo: GuardianUI RefExp Click"
206
  description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on [UIBert RefExp](https://huggingface.co/datasets/ivelin/ui_refexp_saved) Dataset (UI Referring Expression). To use it, simply upload your image and type a prompt and click 'submit', or click one of the examples to load them. Optionally enter value for model git revision; latest checkpoint will be used by default."
207
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
208
- examples = [["example_1.jpg", "select the setting icon from top right corner", "", False],
209
- ["example_1.jpg", "click on down arrow beside the entertainment", "", False],
210
- ["example_1.jpg", "select the down arrow button beside lifestyle", "", False],
211
- ["example_1.jpg", "click on the image beside the option traffic", "", False],
212
- ["example_3.jpg", "select the third row first image", "", False],
213
- ["example_3.jpg", "click the tick mark on the first image", "", False],
214
- ["example_3.jpg", "select the ninth image", "", False],
215
- ["example_3.jpg", "select the add icon", "", False],
216
- ["example_3.jpg", "click the first image", "", False],
217
- ["val-image-4.jpg", 'select 4153365454', "", False],
218
- ['val-image-4.jpg', 'go to cell', "", False],
219
- ['val-image-4.jpg', 'select number above cell', "", False],
220
- ["val-image-1.jpg", "select calendar option", "", False],
221
- ["val-image-1.jpg", "select photos&videos option", "", False],
222
- ["val-image-2.jpg", "click on change store", "", False],
223
- ["val-image-2.jpg", "click on shop menu at the bottom", "", False],
224
- ["val-image-3.jpg", "click on image above short meow", "", False],
225
- ["val-image-3.jpg", "go to cat sounds", "", False],
226
- ["example_2.jpg", "click on green color button", "", False],
227
- ["example_2.jpg", "click on text which is beside call now", "", False],
228
- ["example_2.jpg", "click on more button", "", False],
229
- ["example_2.jpg", "enter the text field next to the name", "", False],
230
  ]
231
 
232
  demo = gr.Interface(fn=process_refexp,
 
95
  f"translated point={point}, resized_image_size: {resized_width, resized_height}")
96
 
97
 
98
+ def process_refexp(image: Image, prompt: str, model_revision: str = 'main', return_annotated_image: bool = True):
99
 
100
  print(f"(image, prompt): {image}, {prompt}")
101
 
 
205
  title = "Demo: GuardianUI RefExp Click"
206
  description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on [UIBert RefExp](https://huggingface.co/datasets/ivelin/ui_refexp_saved) Dataset (UI Referring Expression). To use it, simply upload your image and type a prompt and click 'submit', or click one of the examples to load them. Optionally enter value for model git revision; latest checkpoint will be used by default."
207
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
208
+ examples = [["example_1.jpg", "select the setting icon from top right corner", "", True],
209
+ ["example_1.jpg", "click on down arrow beside the entertainment", "", True],
210
+ ["example_1.jpg", "select the down arrow button beside lifestyle", "", True],
211
+ ["example_1.jpg", "click on the image beside the option traffic", "", True],
212
+ ["example_3.jpg", "select the third row first image", "", True],
213
+ ["example_3.jpg", "click the tick mark on the first image", "", True],
214
+ ["example_3.jpg", "select the ninth image", "", True],
215
+ ["example_3.jpg", "select the add icon", "", True],
216
+ ["example_3.jpg", "click the first image", "", True],
217
+ ["val-image-4.jpg", 'select 4153365454', "", True],
218
+ ['val-image-4.jpg', 'go to cell', "", True],
219
+ ['val-image-4.jpg', 'select number above cell', "", True],
220
+ ["val-image-1.jpg", "select calendar option", "", True],
221
+ ["val-image-1.jpg", "select photos&videos option", "", True],
222
+ ["val-image-2.jpg", "click on change store", "", True],
223
+ ["val-image-2.jpg", "click on shop menu at the bottom", "", True],
224
+ ["val-image-3.jpg", "click on image above short meow", "", True],
225
+ ["val-image-3.jpg", "go to cat sounds", "", True],
226
+ ["example_2.jpg", "click on green color button", "", True],
227
+ ["example_2.jpg", "click on text which is beside call now", "", True],
228
+ ["example_2.jpg", "click on more button", "", True],
229
+ ["example_2.jpg", "enter the text field next to the name", "", True],
230
  ]
231
 
232
  demo = gr.Interface(fn=process_refexp,
flagged/image/tmp1ah3uhhm.png ADDED
flagged/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ image,prompt,model_revision,Return Annotated Image,output 0,output 1,flag,username,timestamp
2
+ /root/ui-refexp-click/flagged/image/tmp1ah3uhhm.png,click on browser wallet option,,,,/root/ui-refexp-click/flagged/output 1/tmpyazt_rro.json,,,2023-02-28 20:42:56.524619
flagged/output 1/tmpyazt_rro.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"x": 0.5035532994923858, "y": 0.32, "decoder output sequence (before x,y adjustment)": "<s_prompt> click on browser wallet option</s_prompt><s_target_center><s_x> 0.31</s_x><s_y> 0.32</s_y></s_target_center>"}