Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

ctp-slack-bot / src /ctp_slack_bot /models /webvtt.py

LiKenun

WebVTT models and changes to support it

a60b3fc 5 months ago

raw

history blame

2.68 kB

	from datetime import datetime, timedelta
	from io import BytesIO
	from json import dumps
	from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr
	import re
	from types import MappingProxyType
	from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence
	from webvtt import Caption, WebVTT

	from ctp_slack_bot.models.base import Chunk, Content

	SPEAKER_SPEECH_CAPTION_TEXT_PATTERN = re.compile('(?:([^:]+): )?(.*)')

	class WebVTTFrame(BaseModel):
	"""Represents a WebVTT frame"""

	identifier: str
	start: timedelta
	end: timedelta
	speaker: Optional[str] = None
	speech: str

	model_config = ConfigDict(frozen=True)

	@classmethod
	def from_webvtt_caption(cls: type["WebVTTFrame"], caption: Caption) -> Self:
	identifier = caption.identifier
	start = timedelta(**caption.start_time.__dict__)
	end = timedelta(**caption.end_time.__dict__)
	speech = caption.text
	match SPEAKER_SPEECH_CAPTION_TEXT_PATTERN.search(speech).groups():
	case (speaker, speech):
	return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
	case _:
	return cls(identifier=identifier, start=start, end=end, speech=speech)


	class WebVTTFile(Content): # TODO: insert a FileContent class in the object inheritance hierarchy.
	"""Represents a message from Slack after adaptation."""

	filename: str
	modification_time: datetime
	bytes: bytes


	def get_chunks(self: Self) -> Sequence[Chunk]:
	return tuple(Chunk(text=frame.speech,
	parent_id=self.id,
	chunk_id=frame.identifier,
	metadata={
	"filename": self.filename,
	"start": self.modification_time + frame.start,
	"end": self.modification_time + frame.end,
	"user": frame.speaker
	})
	for frame
	in self.get_frames())

	def get_metadata(self: Self) -> Mapping[str, Any]:
	return MappingProxyType({
	"filename": self.filename,
	"modificationTime": self.modification_time
	})

	def get_text(self: Self) -> str: # TODO
	raise NotImplemented()

	def get_bytes(self: Self) -> bytes:
	return self.bytes

	def get_frames(self: Self) -> Sequence[WebVTTFrame]:
	return tuple(map(WebVTTFrame.from_webvtt_caption, WebVTT.from_buffer(BytesIO(buffer)).captions))

	@property
	def id(self: Self) -> str:
	return f"file:{self.filename}"