Spaces:
Runtime error
Runtime error
from datetime import datetime, timedelta | |
from io import BytesIO | |
from json import dumps | |
from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr | |
import re | |
from types import MappingProxyType | |
from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence | |
from webvtt import Caption, WebVTT | |
from ctp_slack_bot.models.base import Chunk, Content | |
SPEAKER_SPEECH_CAPTION_TEXT_PATTERN = re.compile('(?:([^:]+): )?(.*)') | |
class WebVTTFrame(BaseModel): | |
"""Represents a WebVTT frame""" | |
identifier: str | |
start: timedelta | |
end: timedelta | |
speaker: Optional[str] = None | |
speech: str | |
model_config = ConfigDict(frozen=True) | |
def from_webvtt_caption(cls: type["WebVTTFrame"], caption: Caption) -> Self: | |
identifier = caption.identifier | |
start = timedelta(**caption.start_time.__dict__) | |
end = timedelta(**caption.end_time.__dict__) | |
speech = caption.text | |
match SPEAKER_SPEECH_CAPTION_TEXT_PATTERN.search(speech).groups(): | |
case (speaker, speech): | |
return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech) | |
case _: | |
return cls(identifier=identifier, start=start, end=end, speech=speech) | |
class WebVTTFile(Content): # TODO: insert a FileContent class in the object inheritance hierarchy. | |
"""Represents a message from Slack after adaptation.""" | |
filename: str | |
modification_time: datetime | |
bytes: bytes | |
def get_chunks(self: Self) -> Sequence[Chunk]: | |
return tuple(Chunk(text=frame.speech, | |
parent_id=self.id, | |
chunk_id=frame.identifier, | |
metadata={ | |
"filename": self.filename, | |
"start": self.modification_time + frame.start, | |
"end": self.modification_time + frame.end, | |
"user": frame.speaker | |
}) | |
for frame | |
in self.get_frames()) | |
def get_metadata(self: Self) -> Mapping[str, Any]: | |
return MappingProxyType({ | |
"filename": self.filename, | |
"modificationTime": self.modification_time | |
}) | |
def get_text(self: Self) -> str: # TODO | |
raise NotImplemented() | |
def get_bytes(self: Self) -> bytes: | |
return self.bytes | |
def get_frames(self: Self) -> Sequence[WebVTTFrame]: | |
return tuple(map(WebVTTFrame.from_webvtt_caption, WebVTT.from_buffer(BytesIO(buffer)).captions)) | |
def id(self: Self) -> str: | |
return f"file:{self.filename}" | |