ImageCaptioning

Sleeping

File size: 1,158 Bytes

9633d94
bee5682
 
 
9633d94
 
 
bee5682
31e8f8b
9633d94
 
 
31e8f8b
9633d94
 
 
31e8f8b
 
bee5682
9633d94
bee5682
 
 
9633d94
bee5682
 
31e8f8b
9633d94
 
bee5682
31e8f8b
bee5682
9633d94
31e8f8b
 
bee5682
31e8f8b
bee5682

from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import gradio as gr

# Load the processor and model
processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")

# Define the captioning function
def caption_image(image):
    # Process the image
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    # Generate captions
    generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
    generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_caption

# Define Gradio interface components
inputs = [
    gr.inputs.Image(type='pil', label='Upload Image')
]

outputs = [
    gr.outputs.Textbox(label='Generated Caption')
]

# Define Gradio app properties
title = "Image Captioning Application"
description = "Upload an image to see the caption generated by the model"

# Create and launch the Gradio interface
gr.Interface(
    fn=caption_image,
    inputs=inputs,
    outputs=outputs,
    title=title,
    description=description,
).launch(debug=True)