# models/schemas.py """Pydantic models for request/response validation""" from pydantic import BaseModel, Field, validator from typing import List, Optional, Literal class EmbeddingRequest(BaseModel): """Request model for embedding generation""" texts: List[str] = Field( ..., description="List of texts to embed", example=["Hola mundo", "¿Cómo estás?"] ) model: Literal["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"] = Field( default="jina", description="Model to use for embeddings" ) normalize: bool = Field( default=True, description="Whether to normalize embeddings to unit length" ) max_length: Optional[int] = Field( default=None, description="Maximum sequence length (uses model default if not specified)" ) @validator('texts') def validate_texts(cls, v): if not v: raise ValueError("At least one text must be provided") if len(v) > 50: raise ValueError("Maximum 50 texts per request") # Check for empty strings if any(not text.strip() for text in v): raise ValueError("Empty texts are not allowed") return v @validator('max_length') def validate_max_length(cls, v, values): if v is not None: model = values.get('model', 'jina') if model in ['jina', 'jina-v3'] and v > 8192: raise ValueError(f"Max length for {model} model is 8192") elif model in ['robertalex', 'legal-bert', 'roberta-ca'] and v > 512: raise ValueError(f"Max length for {model} model is 512") if v < 1: raise ValueError("Max length must be positive") return v class EmbeddingResponse(BaseModel): """Response model for embedding generation""" embeddings: List[List[float]] = Field( ..., description="List of embedding vectors" ) model_used: str = Field( ..., description="Model that was used" ) dimensions: int = Field( ..., description="Dimension of embedding vectors" ) num_texts: int = Field( ..., description="Number of texts processed" ) class ModelInfo(BaseModel): """Information about available models""" model_id: str = Field( ..., description="Model identifier for API calls" ) name: str = Field( ..., description="Full Hugging Face model name" ) dimensions: int = Field( ..., description="Output embedding dimensions" ) max_sequence_length: int = Field( ..., description="Maximum input sequence length" ) languages: List[str] = Field( ..., description="Supported languages" ) model_type: str = Field( ..., description="Type/domain of model" ) description: str = Field( ..., description="Model description" ) class ErrorResponse(BaseModel): """Error response model""" detail: str = Field( ..., description="Error message" ) error_type: Optional[str] = Field( default=None, description="Type of error" )