Spaces:
Sleeping
Sleeping
# models/schemas.py | |
"""Pydantic models for request/response validation""" | |
from pydantic import BaseModel, Field, validator | |
from typing import List, Optional, Literal | |
class EmbeddingRequest(BaseModel): | |
"""Request model for embedding generation""" | |
texts: List[str] = Field( | |
..., | |
description="List of texts to embed", | |
example=["Hola mundo", "¿Cómo estás?"] | |
) | |
model: Literal["jina", "robertalex", "jina-v3", "legal-bert", "roberta-ca"] = Field( | |
default="jina", | |
description="Model to use for embeddings" | |
) | |
normalize: bool = Field( | |
default=True, | |
description="Whether to normalize embeddings to unit length" | |
) | |
max_length: Optional[int] = Field( | |
default=None, | |
description="Maximum sequence length (uses model default if not specified)" | |
) | |
def validate_texts(cls, v): | |
if not v: | |
raise ValueError("At least one text must be provided") | |
if len(v) > 50: | |
raise ValueError("Maximum 50 texts per request") | |
# Check for empty strings | |
if any(not text.strip() for text in v): | |
raise ValueError("Empty texts are not allowed") | |
return v | |
def validate_max_length(cls, v, values): | |
if v is not None: | |
model = values.get('model', 'jina') | |
if model in ['jina', 'jina-v3'] and v > 8192: | |
raise ValueError(f"Max length for {model} model is 8192") | |
elif model in ['robertalex', 'legal-bert', 'roberta-ca'] and v > 512: | |
raise ValueError(f"Max length for {model} model is 512") | |
if v < 1: | |
raise ValueError("Max length must be positive") | |
return v | |
class EmbeddingResponse(BaseModel): | |
"""Response model for embedding generation""" | |
embeddings: List[List[float]] = Field( | |
..., | |
description="List of embedding vectors" | |
) | |
model_used: str = Field( | |
..., | |
description="Model that was used" | |
) | |
dimensions: int = Field( | |
..., | |
description="Dimension of embedding vectors" | |
) | |
num_texts: int = Field( | |
..., | |
description="Number of texts processed" | |
) | |
class ModelInfo(BaseModel): | |
"""Information about available models""" | |
model_id: str = Field( | |
..., | |
description="Model identifier for API calls" | |
) | |
name: str = Field( | |
..., | |
description="Full Hugging Face model name" | |
) | |
dimensions: int = Field( | |
..., | |
description="Output embedding dimensions" | |
) | |
max_sequence_length: int = Field( | |
..., | |
description="Maximum input sequence length" | |
) | |
languages: List[str] = Field( | |
..., | |
description="Supported languages" | |
) | |
model_type: str = Field( | |
..., | |
description="Type/domain of model" | |
) | |
description: str = Field( | |
..., | |
description="Model description" | |
) | |
class ErrorResponse(BaseModel): | |
"""Error response model""" | |
detail: str = Field( | |
..., | |
description="Error message" | |
) | |
error_type: Optional[str] = Field( | |
default=None, | |
description="Type of error" | |
) |