Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ model.to(device)
|
|
20 |
TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
|
21 |
|
22 |
|
23 |
-
def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
|
24 |
"""
|
25 |
Process a single image to generate a caption.
|
26 |
Supports image input as file path, numpy array, or PIL Image.
|
@@ -37,7 +37,7 @@ def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
|
|
37 |
|
38 |
# Prepare inputs for the model
|
39 |
inputs = processor(
|
40 |
-
text=
|
41 |
images=image,
|
42 |
return_tensors="pt"
|
43 |
)
|
@@ -85,6 +85,11 @@ with gr.Blocks(css=css) as demo:
|
|
85 |
|
86 |
submit_btn = gr.Button(value="Submit")
|
87 |
|
|
|
|
|
|
|
|
|
|
|
88 |
num_beams_slider = gr.Slider(
|
89 |
minimum=1,
|
90 |
maximum=5,
|
@@ -109,16 +114,16 @@ with gr.Blocks(css=css) as demo:
|
|
109 |
|
110 |
gr.Examples(
|
111 |
[
|
112 |
-
["eval_img_1.jpg", 5, 0.0, 1.0],
|
113 |
-
["eval_img_2.jpg", 5, 0.0, 1.0],
|
114 |
-
["eval_img_3.jpg", 5, 0.0, 1.0],
|
115 |
-
["eval_img_4.jpg", 5, 0.0, 1.0],
|
116 |
-
["eval_img_5.jpg", 5, 0.0, 1.0],
|
117 |
-
["eval_img_6.jpg", 5, 0.0, 1.0],
|
118 |
-
["eval_img_7.png", 5, 0.0, 1.0],
|
119 |
-
["eval_img_8.jpg", 5, 0.0, 1.0],
|
120 |
],
|
121 |
-
inputs=[input_img, num_beams_slider, min_p_slider, top_p_slider],
|
122 |
outputs=[output_text],
|
123 |
fn=process_image,
|
124 |
label="Try captioning on below examples",
|
@@ -126,7 +131,7 @@ with gr.Blocks(css=css) as demo:
|
|
126 |
|
127 |
submit_btn.click(
|
128 |
process_image,
|
129 |
-
[input_img, num_beams_slider, min_p_slider, top_p_slider],
|
130 |
[output_text]
|
131 |
)
|
132 |
|
|
|
20 |
TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
|
21 |
|
22 |
|
23 |
+
def process_image(image, task="<CAPTION>", num_beams=5, min_p=0.0, top_p=1.0):
|
24 |
"""
|
25 |
Process a single image to generate a caption.
|
26 |
Supports image input as file path, numpy array, or PIL Image.
|
|
|
37 |
|
38 |
# Prepare inputs for the model
|
39 |
inputs = processor(
|
40 |
+
text=task,
|
41 |
images=image,
|
42 |
return_tensors="pt"
|
43 |
)
|
|
|
85 |
|
86 |
submit_btn = gr.Button(value="Submit")
|
87 |
|
88 |
+
task_dropdown = gr.Dropdown(
|
89 |
+
["<CAPTION>", "<DETAILED_CAPTION>", "<MORE_DETAILED_CAPTION>"],
|
90 |
+
value="<CAPTION>",
|
91 |
+
label="Captioning Mode",
|
92 |
+
),
|
93 |
num_beams_slider = gr.Slider(
|
94 |
minimum=1,
|
95 |
maximum=5,
|
|
|
114 |
|
115 |
gr.Examples(
|
116 |
[
|
117 |
+
["eval_img_1.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
118 |
+
["eval_img_2.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
119 |
+
["eval_img_3.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
120 |
+
["eval_img_4.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
121 |
+
["eval_img_5.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
122 |
+
["eval_img_6.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
123 |
+
["eval_img_7.png", "<CAPTION>", 5, 0.0, 1.0],
|
124 |
+
["eval_img_8.jpg", "<CAPTION>", 5, 0.0, 1.0],
|
125 |
],
|
126 |
+
inputs=[input_img, task_dropdown, num_beams_slider, min_p_slider, top_p_slider],
|
127 |
outputs=[output_text],
|
128 |
fn=process_image,
|
129 |
label="Try captioning on below examples",
|
|
|
131 |
|
132 |
submit_btn.click(
|
133 |
process_image,
|
134 |
+
[input_img, task_dropdown, num_beams_slider, min_p_slider, top_p_slider],
|
135 |
[output_text]
|
136 |
)
|
137 |
|