File size: 1,563 Bytes
bca9f16
6b8d35c
bca9f16
6b8d35c
bca9f16
 
 
 
52fd1d4
bca9f16
6b8d35c
e1acbd5
 
 
bca9f16
86573d5
 
 
 
 
 
e1acbd5
 
 
 
 
 
 
 
 
 
 
 
86573d5
bca9f16
6b8d35c
86573d5
 
6b8d35c
86573d5
 
 
364461b
86573d5
e1acbd5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import torch
import gradio as gr
from transformers import pipeline

CAPTION_MODELS = {
    'blip-base': 'Salesforce/blip-image-captioning-base',
    'blip-large': 'Salesforce/blip-image-captioning-large',
    'vit-gpt2-coco-en': 'ydshieh/vit-gpt2-coco-en',
    'blip2-2.7b-fp16': 'Mediocreatmybest/blip2-opt-2.7b-fp16-sharded',
}

# Create a dictionary to store loaded models
loaded_models = {}

# Simple caption creation
def caption_image(model_choice, image_input, url_input):
    if image_input is not None:
        input_data = image_input
    else:
        input_data = url_input

    # Check if the model is already loaded
    if model_choice in loaded_models:
        captioner = loaded_models[model_choice]
    else:
        captioner = pipeline(task="image-to-text",
                            model=CAPTION_MODELS[model_choice],
                            max_new_tokens=30,
                            device_map="cpu", use_fast=True
                            )
        # Store the loaded model
        loaded_models[model_choice] = captioner

    caption = captioner(input_data)[0]['generated_text']
    return str(caption).strip()

def launch(model_choice, image_input, url_input):
    return caption_image(model_choice, image_input, url_input)

model_dropdown = gr.Dropdown(choices=list(CAPTION_MODELS.keys()), label='Select Caption Model')
image_input = gr.Image(type="pil", label="Input Image")
url_input = gr.Text(label="Input URL")

iface = gr.Interface(launch, inputs=[model_dropdown, image_input, url_input], outputs="text")
iface.launch()