import gradio as gr | |
from transformers import pipeline | |
from PIL import Image | |
# Initialize the pipeline with the image captioning model | |
caption_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
def generate_caption(image): | |
# The image is received as a PIL Image, so no need for conversion | |
result = caption_pipeline(image) | |
caption = result[0]["generated_text"] | |
return caption | |
# Setup the Gradio interface | |
interface = gr.Interface(fn=generate_caption, | |
inputs=gr.components.Image(type="pil", label="Upload an Image"), | |
outputs=gr.components.Textbox(label="Generated Caption")) | |
interface.launch() | |