Jatin Mehra
Refactor and reorganize codebase for improved maintainability and clarity
ba907cd
"""
Pydantic models and data structures for PDF Insight Beta application.
This module defines all the data models used throughout the application.
"""
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
class ChatRequest(BaseModel):
"""Request model for chat endpoint."""
session_id: str = Field(..., description="Session identifier")
query: str = Field(..., description="User query")
use_search: bool = Field(default=False, description="Whether to use web search")
model_name: str = Field(
default="meta-llama/llama-4-scout-17b-16e-instruct",
description="LLM model to use"
)
class SessionRequest(BaseModel):
"""Request model for session-related endpoints."""
session_id: str = Field(..., description="Session identifier")
class UploadResponse(BaseModel):
"""Response model for PDF upload."""
status: str
session_id: str
message: str
class ChatResponse(BaseModel):
"""Response model for chat endpoint."""
status: str
answer: str
context_used: List[Dict[str, Any]]
class ChatHistoryResponse(BaseModel):
"""Response model for chat history endpoint."""
status: str
history: List[Dict[str, str]]
class StatusResponse(BaseModel):
"""Generic status response model."""
status: str
message: str
class ErrorResponse(BaseModel):
"""Error response model."""
status: str
detail: str
type: Optional[str] = None
class ModelInfo(BaseModel):
"""Model information."""
id: str
name: str
class ModelsResponse(BaseModel):
"""Response model for models endpoint."""
models: List[ModelInfo]
class ChunkMetadata(BaseModel):
"""Metadata for document chunks."""
source: Optional[str] = None
page: Optional[int] = None
class Config:
extra = "allow" # Allow additional metadata fields
class DocumentChunk(BaseModel):
"""Document chunk with text and metadata."""
text: str
metadata: ChunkMetadata
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary format used in processing."""
return {
"text": self.text,
"metadata": self.metadata.dict()
}
class SessionData(BaseModel):
"""Session data structure."""
file_path: str
file_name: str
chunks: List[Dict[str, Any]] # List of chunk dictionaries
chat_history: List[Dict[str, str]] = Field(default_factory=list)
class Config:
arbitrary_types_allowed = True # Allow non-Pydantic types like FAISS index
class ChatHistoryEntry(BaseModel):
"""Single chat history entry."""
user: str
assistant: str
class ContextChunk(BaseModel):
"""Context chunk with similarity score."""
text: str
score: float
metadata: Dict[str, Any]