Spaces:

Mediocreatmybest
/

PipelineImageCaption

Runtime error

File size: 1,563 Bytes

bca9f16
6b8d35c
bca9f16
6b8d35c
bca9f16
 
 
 
52fd1d4
bca9f16
6b8d35c
e1acbd5
 
 
bca9f16
86573d5
 
 
 
 
 
e1acbd5
 
 
 
 
 
 
 
 
 
 
 
86573d5
bca9f16
6b8d35c
86573d5
 
6b8d35c
86573d5
 
 
364461b
86573d5
e1acbd5

import torch
import gradio as gr
from transformers import pipeline

CAPTION_MODELS = {
    'blip-base': 'Salesforce/blip-image-captioning-base',
    'blip-large': 'Salesforce/blip-image-captioning-large',
    'vit-gpt2-coco-en': 'ydshieh/vit-gpt2-coco-en',
    'blip2-2.7b-fp16': 'Mediocreatmybest/blip2-opt-2.7b-fp16-sharded',
}

# Create a dictionary to store loaded models
loaded_models = {}

# Simple caption creation
def caption_image(model_choice, image_input, url_input):
    if image_input is not None:
        input_data = image_input
    else:
        input_data = url_input

    # Check if the model is already loaded
    if model_choice in loaded_models:
        captioner = loaded_models[model_choice]
    else:
        captioner = pipeline(task="image-to-text",
                            model=CAPTION_MODELS[model_choice],
                            max_new_tokens=30,
                            device_map="cpu", use_fast=True
                            )
        # Store the loaded model
        loaded_models[model_choice] = captioner

    caption = captioner(input_data)[0]['generated_text']
    return str(caption).strip()

def launch(model_choice, image_input, url_input):
    return caption_image(model_choice, image_input, url_input)

model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
image_input = gr.Image(type="pil", label="Input Image")
url_input = gr.Text(label="Input URL")

iface = gr.Interface(launch, inputs=[model_dropdown, image_input, url_input], outputs="text")
iface.launch()