xzuyn commited on
Commit
b329cda
·
verified ·
1 Parent(s): 240d96f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -20,10 +20,11 @@ model.to(device)
20
  TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
21
 
22
 
23
- def process_image(image):
24
  """
25
  Process a single image to generate a caption.
26
  Supports image input as file path, numpy array, or PIL Image.
 
27
  """
28
  try:
29
  # Convert input to PIL image if necessary
@@ -45,8 +46,10 @@ def process_image(image):
45
  input_ids=inputs["input_ids"],
46
  pixel_values=inputs["pixel_values"],
47
  max_new_tokens=1024,
48
- num_beams=5,
49
  do_sample=True,
 
 
50
  )
51
 
52
  # Decode and post-process the generated text
@@ -71,28 +74,43 @@ with gr.Blocks(css=css) as demo:
71
  with gr.Row():
72
  with gr.Column():
73
  input_img = gr.Image(label="Input Picture")
74
- submit_btn = gr.Button(value="Submit")
75
  with gr.Column():
76
  output_text = gr.Textbox(label="Output Text")
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  gr.Examples(
79
  [
80
- ["eval_img_1.jpg"],
81
- ["eval_img_2.jpg"],
82
- ["eval_img_3.jpg"],
83
- ["eval_img_4.jpg"],
84
- ["eval_img_5.jpg"],
85
- ["eval_img_6.jpg"],
86
- ["eval_img_7.png"],
87
- ["eval_img_8.jpg"],
88
  ],
89
- inputs=[input_img],
90
  outputs=[output_text],
91
  fn=process_image,
92
  label="Try captioning on below examples",
93
  )
94
 
95
- submit_btn.click(process_image, [input_img], [output_text])
 
 
 
 
96
 
97
  if __name__ == "__main__":
98
  demo.launch(debug=True)
 
20
  TITLE = f"# [{model_name}](https://huggingface.co/{model_name})"
21
 
22
 
23
+ def process_image(image, num_beams=5, min_p=0.0, top_p=1.0):
24
  """
25
  Process a single image to generate a caption.
26
  Supports image input as file path, numpy array, or PIL Image.
27
+ Generation settings (num_beams, min_p, top_p) can be customized.
28
  """
29
  try:
30
  # Convert input to PIL image if necessary
 
46
  input_ids=inputs["input_ids"],
47
  pixel_values=inputs["pixel_values"],
48
  max_new_tokens=1024,
49
+ num_beams=num_beams,
50
  do_sample=True,
51
+ top_p=top_p,
52
+ min_p=min_p,
53
  )
54
 
55
  # Decode and post-process the generated text
 
74
  with gr.Row():
75
  with gr.Column():
76
  input_img = gr.Image(label="Input Picture")
 
77
  with gr.Column():
78
  output_text = gr.Textbox(label="Output Text")
79
 
80
+ submit_btn = gr.Button(value="Submit")
81
+
82
+ num_beams_slider = gr.Slider(
83
+ minimum=1, maximum=5, step=1, value=5, label="Number of Beams"
84
+ )
85
+ min_p_slider = gr.Slider(
86
+ minimum=0, maximum=1, step=0.01, value=0.0, label="Min-P"
87
+ )
88
+ top_p_slider = gr.Slider(
89
+ minimum=0, maximum=1, step=0.01, value=1.0, label="Top-P"
90
+ )
91
+
92
  gr.Examples(
93
  [
94
+ ["eval_img_1.jpg", 5, 0.0, 1.0],
95
+ ["eval_img_2.jpg", 5, 0.0, 1.0],
96
+ ["eval_img_3.jpg", 5, 0.0, 1.0],
97
+ ["eval_img_4.jpg", 5, 0.0, 1.0],
98
+ ["eval_img_5.jpg", 5, 0.0, 1.0],
99
+ ["eval_img_6.jpg", 5, 0.0, 1.0],
100
+ ["eval_img_7.png", 5, 0.0, 1.0],
101
+ ["eval_img_8.jpg", 5, 0.0, 1.0],
102
  ],
103
+ inputs=[input_img, num_beams_slider, min_p_slider, top_p_slider],
104
  outputs=[output_text],
105
  fn=process_image,
106
  label="Try captioning on below examples",
107
  )
108
 
109
+ submit_btn.click(
110
+ process_image,
111
+ [input_img, num_beams_slider, min_p_slider, top_p_slider],
112
+ [output_text]
113
+ )
114
 
115
  if __name__ == "__main__":
116
  demo.launch(debug=True)