Spaces:

Mediocreatmybest
/

PipelineImageCaption

Runtime error

App Files Files Community

Mediocreatmybest commited on Jul 15, 2023

Commit

20a5e29

1 Parent(s): b9aba88

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 import gradio as gr
 from transformers import pipeline
 CAPTION_MODELS = {
     'blip-base': 'Salesforce/blip-image-captioning-base',
@@ -15,12 +16,13 @@ CAPTION_MODELS = {
 loaded_models = {}
 # Simple caption creation
-def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
     if image_input is not None:
-        input_data = image_input
     else:
-        input_data = url_input
     model_key = (model_choice, load_in_8bit)  # Create a tuple to represent the unique combination of model and 8bit loading
     # Check if the model is already loaded
@@ -32,7 +34,7 @@ def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
         captioner = pipeline(task="image-to-text",
                             model=CAPTION_MODELS[model_choice],
                             max_new_tokens=30,
-                            device=device, # Set the device as selected
                             model_kwargs=model_kwargs,
                             torch_dtype=dtype,  # Set the floating point
                             use_fast=True
@@ -40,14 +42,20 @@ def caption_image(model_choice, image_input, url_input, load_in_8bit, device):
         # Store the loaded model
         loaded_models[model_key] = captioner
-    caption = captioner(input_data)
-    return [str(c['generated_text']).strip() for c in caption]
 model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
-image_input = gr.Image(type="pil", label="Input Image")  # multiple is not supported in gradio
-url_input = gr.Text(label="Input URL")
 load_in_8bit = gr.Checkbox(label="Load model in 8bit")
-device = gr.Radio(['cpu', 'cuda'], label='Select device')
-iface = gr.Interface(caption_image, inputs=[model_dropdown, image_input, url_input, load_in_8bit, device], outputs=gr.Textbox(type="text", label="Caption"))
 iface.launch()

 import torch
 import gradio as gr
 from transformers import pipeline
+import ast
 CAPTION_MODELS = {
     'blip-base': 'Salesforce/blip-image-captioning-base',
 loaded_models = {}
 # Simple caption creation
+def caption_image(model_choice, image_input, url_inputs, load_in_8bit):
     if image_input is not None:
+        input_data = [image_input]
     else:
+        input_data = ast.literal_eval(url_inputs)  # interpret the input string as a list
+    captions = []
     model_key = (model_choice, load_in_8bit)  # Create a tuple to represent the unique combination of model and 8bit loading
     # Check if the model is already loaded
         captioner = pipeline(task="image-to-text",
                             model=CAPTION_MODELS[model_choice],
                             max_new_tokens=30,
+                            device='cpu', # Set the device as CPU
                             model_kwargs=model_kwargs,
                             torch_dtype=dtype,  # Set the floating point
                             use_fast=True
         # Store the loaded model
         loaded_models[model_key] = captioner
+    for input_item in input_data:
+        caption = captioner(input_item)[0]['generated_text']
+        captions.append(str(caption).strip())
+    return captions
+def launch(model_choice, image_input, url_inputs, load_in_8bit, device):
+    return caption_image(model_choice, image_input, url_inputs, load_in_8bit, device)
 model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
+image_input = gr.Image(type="pil", label="Input Image", multiple=True)  # Enable multiple inputs
+url_inputs = gr.Textbox(label="Input URLs")
 load_in_8bit = gr.Checkbox(label="Load model in 8bit")
+device = gr.Radio(['cpu', 'cuda'], label='Select device', default='cpu')
+iface = gr.Interface(launch, inputs=[model_dropdown, image_input, url_inputs, load_in_8bit, device],
+                     outputs=gr.outputs.Textbox(type="text", label="Caption"))
 iface.launch()