Spaces:

AuraSystems
/

spanish-embeddings-api

Sleeping

File size: 5,740 Bytes

03eefac

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from typing import List
import torch
import uvicorn

from models.schemas import EmbeddingRequest, EmbeddingResponse, ModelInfo
from utils.helpers import load_models, get_embeddings, cleanup_memory

# Global model cache - completely on-demand loading
models_cache = {}

# All models load on demand to test deployment
ON_DEMAND_MODELS = ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"]

def ensure_model_loaded(model_name: str):
    """Load a specific model on demand if not already loaded"""
    global models_cache
    if model_name not in models_cache:
        if model_name in ON_DEMAND_MODELS:
            try:
                print(f"Loading model on demand: {model_name}...")
                new_models = load_models([model_name])
                models_cache.update(new_models)
                print(f"Model {model_name} loaded successfully!")
            except Exception as e:
                print(f"Failed to load model {model_name}: {str(e)}")
                raise HTTPException(status_code=500, detail=f"Model {model_name} loading failed: {str(e)}")
        else:
            raise HTTPException(status_code=400, detail=f"Unknown model: {model_name}")

app = FastAPI(
    title="Multilingual & Legal Embedding API",
    description="Multi-model embedding API for Spanish, Catalan, English and Legal texts",
    version="3.0.0"
)

# Add CORS middleware to allow cross-origin requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify actual domains
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/")
async def root():
    return {
        "message": "Multilingual & Legal Embedding API - Minimal Version",
        "models": ["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"],
        "status": "running",
        "docs": "/docs",
        "total_models": 5,
        "note": "All models load on first request"
    }

@app.post("/embed", response_model=EmbeddingResponse)
async def create_embeddings(request: EmbeddingRequest):
    """Generate embeddings for input texts"""
    try:
        # Load specific model on demand
        ensure_model_loaded(request.model)
        
        if not request.texts:
            raise HTTPException(status_code=400, detail="No texts provided")
        
        if len(request.texts) > 50:  # Rate limiting
            raise HTTPException(status_code=400, detail="Maximum 50 texts per request")
        
        embeddings = get_embeddings(
            request.texts,
            request.model,
            models_cache,
            request.normalize,
            request.max_length
        )
        
        # Cleanup memory after large batches
        if len(request.texts) > 20:
            cleanup_memory()
        
        return EmbeddingResponse(
            embeddings=embeddings,
            model_used=request.model,
            dimensions=len(embeddings[0]) if embeddings else 0,
            num_texts=len(request.texts)
        )
        
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")

@app.get("/models", response_model=List[ModelInfo])
async def list_models():
    """List available models and their specifications"""
    return [
        ModelInfo(
            model_id="jina",
            name="jinaai/jina-embeddings-v2-base-es",
            dimensions=768,
            max_sequence_length=8192,
            languages=["Spanish", "English"],
            model_type="bilingual",
            description="Bilingual Spanish-English embeddings with long context support"
        ),
        ModelInfo(
            model_id="robertalex",
            name="PlanTL-GOB-ES/RoBERTalex",
            dimensions=768,
            max_sequence_length=512,
            languages=["Spanish"],
            model_type="legal domain",
            description="Spanish legal domain specialized embeddings"
        ),
        ModelInfo(
            model_id="jina-v3",
            name="jinaai/jina-embeddings-v3",
            dimensions=1024,
            max_sequence_length=8192,
            languages=["Multilingual"],
            model_type="multilingual",
            description="Latest Jina v3 with superior multilingual performance"
        ),
        ModelInfo(
            model_id="legal-bert",
            name="nlpaueb/legal-bert-base-uncased",
            dimensions=768,
            max_sequence_length=512,
            languages=["English"],
            model_type="legal domain",
            description="English legal domain BERT model"
        ),
        ModelInfo(
            model_id="roberta-ca",
            name="projecte-aina/roberta-large-ca-v2",
            dimensions=1024,
            max_sequence_length=512,
            languages=["Catalan"],
            model_type="general",
            description="Catalan RoBERTa-large model trained on large corpus"
        )
    ]

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    all_models_loaded = len(models_cache) == 5
    
    return {
        "status": "healthy",
        "all_models_loaded": all_models_loaded,
        "available_models": list(models_cache.keys()),
        "on_demand_models": ON_DEMAND_MODELS,
        "models_count": len(models_cache),
        "note": "All models load on first embedding request - minimal deployment version"
    }

if __name__ == "__main__":
    # Set multi-threading for CPU
    torch.set_num_threads(8)
    torch.set_num_interop_threads(1)
    
    uvicorn.run(app, host="0.0.0.0", port=7860)