Spaces:
Sleeping
Sleeping
fix: show annotated image in web ui
Browse filesSigned-off-by: ivelin <[email protected]>
- app.py +23 -23
- flagged/image/tmp1ah3uhhm.png +0 -0
- flagged/log.csv +2 -0
- flagged/output 1/tmpyazt_rro.json +1 -0
app.py
CHANGED
@@ -95,7 +95,7 @@ def translate_point_coords_from_out_to_in(point=None, input_image_size=None, out
|
|
95 |
f"translated point={point}, resized_image_size: {resized_width, resized_height}")
|
96 |
|
97 |
|
98 |
-
def process_refexp(image: Image, prompt: str, model_revision: str = 'main', return_annotated_image: bool =
|
99 |
|
100 |
print(f"(image, prompt): {image}, {prompt}")
|
101 |
|
@@ -205,28 +205,28 @@ def process_refexp(image: Image, prompt: str, model_revision: str = 'main', retu
|
|
205 |
title = "Demo: GuardianUI RefExp Click"
|
206 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on [UIBert RefExp](https://huggingface.co/datasets/ivelin/ui_refexp_saved) Dataset (UI Referring Expression). To use it, simply upload your image and type a prompt and click 'submit', or click one of the examples to load them. Optionally enter value for model git revision; latest checkpoint will be used by default."
|
207 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
208 |
-
examples = [["example_1.jpg", "select the setting icon from top right corner", "",
|
209 |
-
["example_1.jpg", "click on down arrow beside the entertainment", "",
|
210 |
-
["example_1.jpg", "select the down arrow button beside lifestyle", "",
|
211 |
-
["example_1.jpg", "click on the image beside the option traffic", "",
|
212 |
-
["example_3.jpg", "select the third row first image", "",
|
213 |
-
["example_3.jpg", "click the tick mark on the first image", "",
|
214 |
-
["example_3.jpg", "select the ninth image", "",
|
215 |
-
["example_3.jpg", "select the add icon", "",
|
216 |
-
["example_3.jpg", "click the first image", "",
|
217 |
-
["val-image-4.jpg", 'select 4153365454', "",
|
218 |
-
['val-image-4.jpg', 'go to cell', "",
|
219 |
-
['val-image-4.jpg', 'select number above cell', "",
|
220 |
-
["val-image-1.jpg", "select calendar option", "",
|
221 |
-
["val-image-1.jpg", "select photos&videos option", "",
|
222 |
-
["val-image-2.jpg", "click on change store", "",
|
223 |
-
["val-image-2.jpg", "click on shop menu at the bottom", "",
|
224 |
-
["val-image-3.jpg", "click on image above short meow", "",
|
225 |
-
["val-image-3.jpg", "go to cat sounds", "",
|
226 |
-
["example_2.jpg", "click on green color button", "",
|
227 |
-
["example_2.jpg", "click on text which is beside call now", "",
|
228 |
-
["example_2.jpg", "click on more button", "",
|
229 |
-
["example_2.jpg", "enter the text field next to the name", "",
|
230 |
]
|
231 |
|
232 |
demo = gr.Interface(fn=process_refexp,
|
|
|
95 |
f"translated point={point}, resized_image_size: {resized_width, resized_height}")
|
96 |
|
97 |
|
98 |
+
def process_refexp(image: Image, prompt: str, model_revision: str = 'main', return_annotated_image: bool = True):
|
99 |
|
100 |
print(f"(image, prompt): {image}, {prompt}")
|
101 |
|
|
|
205 |
title = "Demo: GuardianUI RefExp Click"
|
206 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on [UIBert RefExp](https://huggingface.co/datasets/ivelin/ui_refexp_saved) Dataset (UI Referring Expression). To use it, simply upload your image and type a prompt and click 'submit', or click one of the examples to load them. Optionally enter value for model git revision; latest checkpoint will be used by default."
|
207 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
208 |
+
examples = [["example_1.jpg", "select the setting icon from top right corner", "", True],
|
209 |
+
["example_1.jpg", "click on down arrow beside the entertainment", "", True],
|
210 |
+
["example_1.jpg", "select the down arrow button beside lifestyle", "", True],
|
211 |
+
["example_1.jpg", "click on the image beside the option traffic", "", True],
|
212 |
+
["example_3.jpg", "select the third row first image", "", True],
|
213 |
+
["example_3.jpg", "click the tick mark on the first image", "", True],
|
214 |
+
["example_3.jpg", "select the ninth image", "", True],
|
215 |
+
["example_3.jpg", "select the add icon", "", True],
|
216 |
+
["example_3.jpg", "click the first image", "", True],
|
217 |
+
["val-image-4.jpg", 'select 4153365454', "", True],
|
218 |
+
['val-image-4.jpg', 'go to cell', "", True],
|
219 |
+
['val-image-4.jpg', 'select number above cell', "", True],
|
220 |
+
["val-image-1.jpg", "select calendar option", "", True],
|
221 |
+
["val-image-1.jpg", "select photos&videos option", "", True],
|
222 |
+
["val-image-2.jpg", "click on change store", "", True],
|
223 |
+
["val-image-2.jpg", "click on shop menu at the bottom", "", True],
|
224 |
+
["val-image-3.jpg", "click on image above short meow", "", True],
|
225 |
+
["val-image-3.jpg", "go to cat sounds", "", True],
|
226 |
+
["example_2.jpg", "click on green color button", "", True],
|
227 |
+
["example_2.jpg", "click on text which is beside call now", "", True],
|
228 |
+
["example_2.jpg", "click on more button", "", True],
|
229 |
+
["example_2.jpg", "enter the text field next to the name", "", True],
|
230 |
]
|
231 |
|
232 |
demo = gr.Interface(fn=process_refexp,
|
flagged/image/tmp1ah3uhhm.png
ADDED
![]() |
flagged/log.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
image,prompt,model_revision,Return Annotated Image,output 0,output 1,flag,username,timestamp
|
2 |
+
/root/ui-refexp-click/flagged/image/tmp1ah3uhhm.png,click on browser wallet option,,,,/root/ui-refexp-click/flagged/output 1/tmpyazt_rro.json,,,2023-02-28 20:42:56.524619
|
flagged/output 1/tmpyazt_rro.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"x": 0.5035532994923858, "y": 0.32, "decoder output sequence (before x,y adjustment)": "<s_prompt> click on browser wallet option</s_prompt><s_target_center><s_x> 0.31</s_x><s_y> 0.32</s_y></s_target_center>"}
|