Spaces:

KingZack
/

ctp-slack-bot

Runtime error

File size: 4,832 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# WebVTT Reading and Chunking Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime, timedelta\n",
    "from functools import partial\n",
    "from html import escape\n",
    "from io import BytesIO\n",
    "from IPython.display import display_html\n",
    "from itertools import chain\n",
    "import re\n",
    "from webvtt import Caption, WebVTT\n",
    "from webvtt.models import Timestamp\n",
    "from zoneinfo import ZoneInfo\n",
    "\n",
    "display_html = partial(display_html, raw=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "FILE_PATH = 'GMT20250411-223535_Recording.transcript.vtt'\n",
    "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
    "BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(FILE_PATH, 'rb') as file:\n",
    "    web_vtt = WebVTT.from_buffer(BytesIO(file.read()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ul><li>__class__</li><li>__delattr__</li><li>__dict__</li><li>__dir__</li><li>__doc__</li><li>__eq__</li><li>__format__</li><li>__ge__</li><li>__getattribute__</li><li>__getitem__</li><li>__getstate__</li><li>__gt__</li><li>__hash__</li><li>__init__</li><li>__init_subclass__</li><li>__le__</li><li>__len__</li><li>__lt__</li><li>__module__</li><li>__ne__</li><li>__new__</li><li>__reduce__</li><li>__reduce_ex__</li><li>__repr__</li><li>__setattr__</li><li>__sizeof__</li><li>__str__</li><li>__subclasshook__</li><li>__weakref__</li><li>_get_destination_file</li><li>_get_lines</li><li>_has_bom</li><li>captions</li><li>content</li><li>encoding</li><li>file</li><li>footer_comments</li><li>from_buffer</li><li>from_sbv</li><li>from_srt</li><li>from_string</li><li>header_comments</li><li>iter_slice</li><li>read</li><li>read_buffer</li><li>save</li><li>save_as_srt</li><li>styles</li><li>total_length</li><li>write</li></ul>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display_html(''.join(chain('<ul>', (f'<li>{escape(member)}</li>' for member in dir(web_vtt)), '</ul>')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "                    <strong>Caption</strong> #344\n",
       "                    <ul>\n",
       "                        <li><strong>Start:</strong> Friday, April 11, 2025, 07:36:54 PM EDT</li>\n",
       "                        <li><strong>Speaker:</strong> CUNY Tech Prep (CTP)</li>\n",
       "                        <li><strong>Speech:</strong> Alright. You can pick the rooms. Now go into your rooms.</li>\n",
       "                        <li><strong>End:</strong> Friday, April 11, 2025, 07:36:57 PM EDT</li>\n",
       "                    </ul>\n",
       "                "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "speaker_speech_pattern = re.compile('(?:([^:]+): )?(.*)')\n",
    "\n",
    "match web_vtt.captions[343]:\n",
    "    case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):\n",
    "        match speaker_speech_pattern.search(text).groups():\n",
    "            case (speaker, speech):\n",
    "                display_html(f\"\"\"\n",
    "                    <strong>Caption</strong> #{identifier}\n",
    "                    <ul>\n",
    "                        <li><strong>Start:</strong> {BASE_TIME + timedelta(**start_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
    "                        <li><strong>Speaker:</strong> {escape(speaker)}</li>\n",
    "                        <li><strong>Speech:</strong> {escape(speech)}</li>\n",
    "                        <li><strong>End:</strong> {BASE_TIME + timedelta(**end_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
    "                    </ul>\n",
    "                \"\"\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}