from fastapi import FastAPI, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from PIL import Image from transformers import AutoProcessor, Blip2ForConditionalGeneration import torch import io app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], # Adjust this as needed for security allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Load the model and processor model = Blip2ForConditionalGeneration.from_pretrained("ybelkada/blip2-opt-2.7b-fp16-sharded") model.load_adapter('blip-cpu-model') processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) @app.post("/generate-caption/") async def generate_caption(file: UploadFile = File(...)): image = Image.open(io.BytesIO(await file.read())) inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) with torch.no_grad(): caption_ids = model.generate(**inputs, max_length=128) caption = processor.decode(caption_ids[0], skip_special_tokens=True) return {"caption": caption}