Spaces:
Sleeping
Sleeping
File size: 989 Bytes
5ef1757 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# utils.py
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch
class ImageCaptioningModel:
def __init__(self, model_name="Salesforce/blip-image-captioning-base"):
"""
Initialize BLIP Image Captioning model.
"""
self.processor = BlipProcessor.from_pretrained(model_name)
self.model = BlipForConditionalGeneration.from_pretrained(model_name)
self.model.eval()
def generate_caption(self, image_path):
"""
Generate a caption for the given image.
:param image_path: Path to the input image
:return: Generated caption (string)
"""
image = Image.open(image_path).convert("RGB")
inputs = self.processor(images=image, return_tensors="pt")
with torch.no_grad():
output = self.model.generate(**inputs)
caption = self.processor.tokenizer.decode(output[0], skip_special_tokens=True)
return caption
|