Spaces:
Sleeping
Sleeping
from transformers import AutoProcessor, AutoModelForCausalLM | |
from PIL import Image | |
import gradio as gr | |
# Load the processor and model | |
processor = AutoProcessor.from_pretrained("microsoft/git-base-coco") | |
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco") | |
# Define the captioning function | |
def caption_image(image): | |
# Process the image | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
# Generate captions | |
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return generated_caption | |
# Define Gradio interface components | |
inputs = [ | |
gr.inputs.Image(type='pil', label='Upload Image') | |
] | |
outputs = [ | |
gr.outputs.Textbox(label='Generated Caption') | |
] | |
# Define Gradio app properties | |
title = "Image Captioning Application" | |
description = "Upload an image to see the caption generated by the model" | |
# Create and launch the Gradio interface | |
gr.Interface( | |
fn=caption_image, | |
inputs=inputs, | |
outputs=outputs, | |
title=title, | |
description=description, | |
).launch(debug=True) | |