# app.py from fastapi import FastAPI, HTTPException, UploadFile, File from pydantic import BaseModel from llama_cpp import Llama from typing import Optional import uvicorn import huggingface_hub import os from PIL import Image import io import base64 app = FastAPI( title="OmniVLM API", description="API for text and image processing using OmniVLM model", version="1.0.0" ) # Download the model from Hugging Face Hub model_path = huggingface_hub.hf_hub_download( repo_id="NexaAIDev/OmniVLM-968M", filename="omnivision-text-optimized-llm-Q8_0.gguf" ) # Initialize the model with the downloaded file llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, n_batch=512, verbose=True ) class GenerationRequest(BaseModel): prompt: str max_tokens: Optional[int] = 100 temperature: Optional[float] = 0.7 top_p: Optional[float] = 0.9 class ImageRequest(BaseModel): prompt: Optional[str] = "Describe this image in detail" max_tokens: Optional[int] = 200 temperature: Optional[float] = 0.7 class GenerationResponse(BaseModel): generated_text: str @app.post("/generate", response_model=GenerationResponse) async def generate_text(request: GenerationRequest): try: output = llm( request.prompt, max_tokens=request.max_tokens, temperature=request.temperature, top_p=request.top_p ) return GenerationResponse(generated_text=output["choices"][0]["text"]) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/process-image", response_model=GenerationResponse) async def process_image( file: UploadFile = File(...), request: ImageRequest = None ): try: # Read and validate the image image_data = await file.read() image = Image.open(io.BytesIO(image_data)) # Convert image to base64 buffered = io.BytesIO() image.save(buffered, format=image.format or "JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() # Create prompt with image prompt = f""" data:image/jpeg;base64,{img_str} {request.prompt if request else "Describe this image in detail"} """ # Generate description output = llm( prompt, max_tokens=request.max_tokens if request else 200, temperature=request.temperature if request else 0.7 ) return GenerationResponse(generated_text=output["choices"][0]["text"]) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") async def health_check(): return {"status": "healthy"} if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port)