import gradio as gr from transformers import pipeline from PIL import Image # Initialize the pipeline with the image captioning model caption_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") def generate_caption(image): # The image is received as a PIL Image, so no need for conversion result = caption_pipeline(image) caption = result[0]["generated_text"] return caption # Setup the Gradio interface interface = gr.Interface(fn=generate_caption, inputs=gr.inputs.Image(label="Upload an Image", type="pil"), outputs=gr.outputs.Textbox(label="Generated Caption")) interface.launch()