imagedescription / utils.py
Segizu's picture
Image2caption simple
5ef1757
raw
history blame contribute delete
989 Bytes
# utils.py
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
class ImageCaptioningModel:
def __init__(self, model_name="Salesforce/blip-image-captioning-base"):
"""
Initialize BLIP Image Captioning model.
"""
self.processor = BlipProcessor.from_pretrained(model_name)
self.model = BlipForConditionalGeneration.from_pretrained(model_name)
self.model.eval()
def generate_caption(self, image_path):
"""
Generate a caption for the given image.
:param image_path: Path to the input image
:return: Generated caption (string)
"""
image = Image.open(image_path).convert("RGB")
inputs = self.processor(images=image, return_tensors="pt")
with torch.no_grad():
output = self.model.generate(**inputs)
caption = self.processor.tokenizer.decode(output[0], skip_special_tokens=True)
return caption