Spaces:
Runtime error
Runtime error
from abc import ABC, abstractmethod | |
from datetime import datetime | |
from pydantic import BaseModel, Field, validator | |
from typing import Dict, List, Optional, Union, Any, ClassVar | |
import hashlib | |
import json | |
class Metadata(BaseModel): | |
"""A class representing metadata about content.""" | |
id: str # The content’s identity consistent across modifications | |
modification_time: datetime # The content’s modification for detection of alterations | |
hash: str # The content’s hash for detection of alterations | |
class Content(BaseModel): | |
"""A class representing ingested content.""" | |
metadata: Metadata | |
class Ingestible(ABC, BaseModel): | |
"""An abstract base class for ingestible content.""" | |
metadata: Metadata | |
def content(self) -> Content: | |
""" | |
Return content ready for vectorization. | |
This could be: | |
- A single string | |
- A list of strings (pre-chunked) | |
- A more complex structure that can be recursively processed | |
""" | |
pass | |
def get_chunks(self) -> List[str]: | |
""" | |
Split content into chunks suitable for vectorization. | |
Override this in subclasses for specialized chunking logic. | |
""" | |
content = self.content | |
if isinstance(content, str): | |
# Simple chunking by character count | |
return [content[i:i+self.chunk_size] | |
for i in range(0, len(content), self.chunk_size)] | |
elif isinstance(content, list): | |
# Content is already chunked | |
return content | |
else: | |
raise ValueError(f"Unsupported content type: {type(content)}") | |
def key(self) -> str: | |
"""Convenience accessor for the metadata key.""" | |
return self.metadata.key | |