from fastapi import FastAPI, HTTPException from typing import List import torch import uvicorn import gc import os from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo from utils.helpers import load_models, get_embeddings, cleanup_memory app = FastAPI( title="Spanish Embedding API", description="Dual Spanish embedding models API", version="1.0.0" ) # Global model cache models_cache = {} @app.on_event("startup") async def startup_event(): """Load models on startup""" global models_cache models_cache = load_models() print("Models loaded successfully!") @app.get("/") async def root(): return { "message": "Spanish Embedding API", "models": ["jina", "robertalex"], "status": "running", "docs": "/docs" } @app.post("/embed", response_model=EmbeddingResponse) async def create_embeddings(request: EmbeddingRequest): """Generate embeddings for input texts""" try: if not request.texts: raise HTTPException(status_code=400, detail="No texts provided") if len(request.texts) > 50: # Rate limiting raise HTTPException(status_code=400, detail="Maximum 50 texts per request") embeddings = get_embeddings( request.texts, request.model, models_cache, request.normalize, request.max_length ) # Cleanup memory after large batches if len(request.texts) > 20: cleanup_memory() return EmbeddingResponse( embeddings=embeddings, model_used=request.model, dimensions=len(embeddings[0]) if embeddings else 0, num_texts=len(request.texts) ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}") @app.get("/models", response_model=List[ModelInfo]) async def list_models(): """List available models and their specifications""" return [ ModelInfo( model_id="jina", name="jinaai/jina-embeddings-v2-base-es", dimensions=768, max_sequence_length=8192, languages=["Spanish", "English"], model_type="bilingual", description="Bilingual Spanish-English embeddings with long context support" ), ModelInfo( model_id="robertalex", name="PlanTL-GOB-ES/RoBERTalex", dimensions=768, max_sequence_length=512, languages=["Spanish"], model_type="legal domain", description="Spanish legal domain specialized embeddings" ) ] @app.get("/health") async def health_check(): """Health check endpoint""" return { "status": "healthy", "models_loaded": len(models_cache) == 2, "available_models": list(models_cache.keys()) } if __name__ == "__main__": # Set multi-threading for CPU torch.set_num_threads(8) torch.set_num_interop_threads(1) uvicorn.run(app, host="0.0.0.0", port=7860)