xzuyn commited on
Commit
b161e40
·
verified ·
1 Parent(s): 5874508

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -20,7 +20,7 @@ model.to(device)
20
  TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
21
 
22
 
23
- def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
24
  """
25
  Process a single image to generate a caption.
26
  Supports image input as file path, numpy array, or PIL Image.
@@ -37,7 +37,7 @@ def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
37
 
38
  # Prepare inputs for the model
39
  inputs = processor(
40
- text="<CAPTION>",
41
  images=image,
42
  return_tensors="pt"
43
  )
@@ -85,6 +85,11 @@ with gr.Blocks(css=css) as demo:
85
 
86
  submit_btn = gr.Button(value="Submit")
87
 
 
 
 
 
 
88
  num_beams_slider = gr.Slider(
89
  minimum=1,
90
  maximum=5,
@@ -109,16 +114,16 @@ with gr.Blocks(css=css) as demo:
109
 
110
  gr.Examples(
111
  [
112
- ["eval_img_1.jpg", 5, 0.0, 1.0],
113
- ["eval_img_2.jpg", 5, 0.0, 1.0],
114
- ["eval_img_3.jpg", 5, 0.0, 1.0],
115
- ["eval_img_4.jpg", 5, 0.0, 1.0],
116
- ["eval_img_5.jpg", 5, 0.0, 1.0],
117
- ["eval_img_6.jpg", 5, 0.0, 1.0],
118
- ["eval_img_7.png", 5, 0.0, 1.0],
119
- ["eval_img_8.jpg", 5, 0.0, 1.0],
120
  ],
121
- inputs=[input_img, num_beams_slider, min_p_slider, top_p_slider],
122
  outputs=[output_text],
123
  fn=process_image,
124
  label="Try captioning on below examples",
@@ -126,7 +131,7 @@ with gr.Blocks(css=css) as demo:
126
 
127
  submit_btn.click(
128
  process_image,
129
- [input_img, num_beams_slider, min_p_slider, top_p_slider],
130
  [output_text]
131
  )
132
 
 
20
  TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
21
 
22
 
23
+ def process_image(image, task="<CAPTION>", num_beams=5, min_p=0.0, top_p=1.0):
24
  """
25
  Process a single image to generate a caption.
26
  Supports image input as file path, numpy array, or PIL Image.
 
37
 
38
  # Prepare inputs for the model
39
  inputs = processor(
40
+ text=task,
41
  images=image,
42
  return_tensors="pt"
43
  )
 
85
 
86
  submit_btn = gr.Button(value="Submit")
87
 
88
+ task_dropdown = gr.Dropdown(
89
+ ["<CAPTION>", "<DETAILED_CAPTION>", "<MORE_DETAILED_CAPTION>"],
90
+ value="<CAPTION>",
91
+ label="Captioning Mode",
92
+ ),
93
  num_beams_slider = gr.Slider(
94
  minimum=1,
95
  maximum=5,
 
114
 
115
  gr.Examples(
116
  [
117
+ ["eval_img_1.jpg", "<CAPTION>", 5, 0.0, 1.0],
118
+ ["eval_img_2.jpg", "<CAPTION>", 5, 0.0, 1.0],
119
+ ["eval_img_3.jpg", "<CAPTION>", 5, 0.0, 1.0],
120
+ ["eval_img_4.jpg", "<CAPTION>", 5, 0.0, 1.0],
121
+ ["eval_img_5.jpg", "<CAPTION>", 5, 0.0, 1.0],
122
+ ["eval_img_6.jpg", "<CAPTION>", 5, 0.0, 1.0],
123
+ ["eval_img_7.png", "<CAPTION>", 5, 0.0, 1.0],
124
+ ["eval_img_8.jpg", "<CAPTION>", 5, 0.0, 1.0],
125
  ],
126
+ inputs=[input_img, task_dropdown, num_beams_slider, min_p_slider, top_p_slider],
127
  outputs=[output_text],
128
  fn=process_image,
129
  label="Try captioning on below examples",
 
131
 
132
  submit_btn.click(
133
  process_image,
134
+ [input_img, task_dropdown, num_beams_slider, min_p_slider, top_p_slider],
135
  [output_text]
136
  )
137