Spaces:
Running
Running
""" | |
Pydantic models and data structures for PDF Insight Beta application. | |
This module defines all the data models used throughout the application. | |
""" | |
from typing import List, Dict, Any, Optional | |
from pydantic import BaseModel, Field | |
class ChatRequest(BaseModel): | |
"""Request model for chat endpoint.""" | |
session_id: str = Field(..., description="Session identifier") | |
query: str = Field(..., description="User query") | |
use_search: bool = Field(default=False, description="Whether to use web search") | |
model_name: str = Field( | |
default="meta-llama/llama-4-scout-17b-16e-instruct", | |
description="LLM model to use" | |
) | |
class SessionRequest(BaseModel): | |
"""Request model for session-related endpoints.""" | |
session_id: str = Field(..., description="Session identifier") | |
class UploadResponse(BaseModel): | |
"""Response model for PDF upload.""" | |
status: str | |
session_id: str | |
message: str | |
class ChatResponse(BaseModel): | |
"""Response model for chat endpoint.""" | |
status: str | |
answer: str | |
context_used: List[Dict[str, Any]] | |
class ChatHistoryResponse(BaseModel): | |
"""Response model for chat history endpoint.""" | |
status: str | |
history: List[Dict[str, str]] | |
class StatusResponse(BaseModel): | |
"""Generic status response model.""" | |
status: str | |
message: str | |
class ErrorResponse(BaseModel): | |
"""Error response model.""" | |
status: str | |
detail: str | |
type: Optional[str] = None | |
class ModelInfo(BaseModel): | |
"""Model information.""" | |
id: str | |
name: str | |
class ModelsResponse(BaseModel): | |
"""Response model for models endpoint.""" | |
models: List[ModelInfo] | |
class ChunkMetadata(BaseModel): | |
"""Metadata for document chunks.""" | |
source: Optional[str] = None | |
page: Optional[int] = None | |
class Config: | |
extra = "allow" # Allow additional metadata fields | |
class DocumentChunk(BaseModel): | |
"""Document chunk with text and metadata.""" | |
text: str | |
metadata: ChunkMetadata | |
def to_dict(self) -> Dict[str, Any]: | |
"""Convert to dictionary format used in processing.""" | |
return { | |
"text": self.text, | |
"metadata": self.metadata.dict() | |
} | |
class SessionData(BaseModel): | |
"""Session data structure.""" | |
file_path: str | |
file_name: str | |
chunks: List[Dict[str, Any]] # List of chunk dictionaries | |
chat_history: List[Dict[str, str]] = Field(default_factory=list) | |
class Config: | |
arbitrary_types_allowed = True # Allow non-Pydantic types like FAISS index | |
class ChatHistoryEntry(BaseModel): | |
"""Single chat history entry.""" | |
user: str | |
assistant: str | |
class ContextChunk(BaseModel): | |
"""Context chunk with similarity score.""" | |
text: str | |
score: float | |
metadata: Dict[str, Any] | |