# WebVTT Reading and Chunking Test

In [1]:
from datetime import datetime, timedelta
from functools import partial
from html import escape
from io import BytesIO
from IPython.display import display_html
from itertools import chain
import re
from webvtt import Caption, WebVTT
from webvtt.models import Timestamp
from zoneinfo import ZoneInfo

display_html = partial(display_html, raw=True)

In [None]:
FILE_PATH = 'GMT20250411-223535_Recording.transcript.vtt'
TIME_ZONE = ZoneInfo("America/New_York")
BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo("GMT")).astimezone(TIME_ZONE)

In [3]:
with open(FILE_PATH, 'rb') as file:
    web_vtt = WebVTT.from_buffer(BytesIO(file.read()))

In [4]:
display_html(''.join(chain('<ul>', (f'<li>{escape(member)}</li>' for member in dir(web_vtt)), '</ul>')))

In [5]:
speaker_speech_pattern = re.compile('(?:([^:]+): )?(.*)')

match web_vtt.captions[343]:
    case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):
        match speaker_speech_pattern.search(text).groups():
            case (speaker, speech):
                display_html(f"""
                    <strong>Caption</strong> #{identifier}
                    <ul>
                        <li><strong>Start:</strong> {BASE_TIME + timedelta(**start_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>
                        <li><strong>Speaker:</strong> {escape(speaker)}</li>
                        <li><strong>Speech:</strong> {escape(speech)}</li>
                        <li><strong>End:</strong> {BASE_TIME + timedelta(**end_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>
                    </ul>
                """)