Mediocreatmybest commited on
Commit
20a5e29
·
1 Parent(s): b9aba88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
 
4
 
5
  CAPTION_MODELS = {
6
  'blip-base': 'Salesforce/blip-image-captioning-base',
@@ -15,12 +16,13 @@ CAPTION_MODELS = {
15
  loaded_models = {}
16
 
17
  # Simple caption creation
18
- def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
19
  if image_input is not None:
20
- input_data = image_input
21
  else:
22
- input_data = url_input
23
 
 
24
  model_key = (model_choice, load_in_8bit) # Create a tuple to represent the unique combination of model and 8bit loading
25
 
26
  # Check if the model is already loaded
@@ -32,7 +34,7 @@ def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
32
  captioner = pipeline(task="image-to-text",
33
  model=CAPTION_MODELS[model_choice],
34
  max_new_tokens=30,
35
- device=device, # Set the device as selected
36
  model_kwargs=model_kwargs,
37
  torch_dtype=dtype, # Set the floating point
38
  use_fast=True
@@ -40,14 +42,20 @@ def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
40
  # Store the loaded model
41
  loaded_models[model_key] = captioner
42
 
43
- caption = captioner(input_data)
44
- return [str(c['generated_text']).strip() for c in caption]
 
 
 
 
 
45
 
46
  model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
47
- image_input = gr.Image(type="pil", label="Input Image") # multiple is not supported in gradio
48
- url_input = gr.Text(label="Input URL")
49
  load_in_8bit = gr.Checkbox(label="Load model in 8bit")
50
- device = gr.Radio(['cpu', 'cuda'], label='Select device')
51
 
52
- iface = gr.Interface(caption_image, inputs=[model_dropdown, image_input, url_input, load_in_8bit, device], outputs=gr.Textbox(type="text", label="Caption"))
 
53
  iface.launch()
 
1
  import torch
2
  import gradio as gr
3
  from transformers import pipeline
4
+ import ast
5
 
6
  CAPTION_MODELS = {
7
  'blip-base': 'Salesforce/blip-image-captioning-base',
 
16
  loaded_models = {}
17
 
18
  # Simple caption creation
19
+ def caption_image(model_choice, image_input, url_inputs, load_in_8bit):
20
  if image_input is not None:
21
+ input_data = [image_input]
22
  else:
23
+ input_data = ast.literal_eval(url_inputs) # interpret the input string as a list
24
 
25
+ captions = []
26
  model_key = (model_choice, load_in_8bit) # Create a tuple to represent the unique combination of model and 8bit loading
27
 
28
  # Check if the model is already loaded
 
34
  captioner = pipeline(task="image-to-text",
35
  model=CAPTION_MODELS[model_choice],
36
  max_new_tokens=30,
37
+ device='cpu', # Set the device as CPU
38
  model_kwargs=model_kwargs,
39
  torch_dtype=dtype, # Set the floating point
40
  use_fast=True
 
42
  # Store the loaded model
43
  loaded_models[model_key] = captioner
44
 
45
+ for input_item in input_data:
46
+ caption = captioner(input_item)[0]['generated_text']
47
+ captions.append(str(caption).strip())
48
+ return captions
49
+
50
+ def launch(model_choice, image_input, url_inputs, load_in_8bit, device):
51
+ return caption_image(model_choice, image_input, url_inputs, load_in_8bit, device)
52
 
53
  model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
54
+ image_input = gr.Image(type="pil", label="Input Image", multiple=True) # Enable multiple inputs
55
+ url_inputs = gr.Textbox(label="Input URLs")
56
  load_in_8bit = gr.Checkbox(label="Load model in 8bit")
57
+ device = gr.Radio(['cpu', 'cuda'], label='Select device', default='cpu')
58
 
59
+ iface = gr.Interface(launch, inputs=[model_dropdown, image_input, url_inputs, load_in_8bit, device],
60
+ outputs=gr.outputs.Textbox(type="text", label="Caption"))
61
  iface.launch()