Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

LiKenun commited on Apr 15

Commit

62f7cb7

1 Parent(s): 06d7b2d

WebVTT timestamp interpolation proof-of-concept

Browse files

Files changed (1) hide show

notebooks/web-vtt.ipynb +13 -6

notebooks/web-vtt.ipynb CHANGED Viewed

@@ -13,6 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
     "from functools import partial\n",
     "from html import escape\n",
     "from io import BytesIO\n",
@@ -20,6 +21,8 @@
     "from itertools import chain\n",
     "import re\n",
     "from webvtt import Caption, WebVTT\n",
     "\n",
     "display_html = partial(display_html, raw=True)"
    ]
@@ -30,7 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "FILE_PATH = #"
    ]
   },
   {
@@ -73,9 +78,10 @@
        "\n",
        "                    <strong>Caption</strong> #344\n",
        "                    <ul>\n",
-       "                        <li><strong>Time:</strong> 01:01:19.390–01:01:22.370</li>\n",
        "                        <li><strong>Speaker:</strong> CUNY Tech Prep (CTP)</li>\n",
-       "                        <li><strong>Speech:</strong> <em>Alright. You can pick the rooms. Now go into your rooms.</em></li>\n",
        "                    </ul>\n",
        "                "
       ]
@@ -88,15 +94,16 @@
     "speaker_speech_pattern = re.compile('(?:([^:]+): )?(.*)')\n",
     "\n",
     "match web_vtt.captions[343]:\n",
-    "    case Caption(identifier=identifier, start=start, end=end, text=text):\n",
     "        match speaker_speech_pattern.search(text).groups():\n",
     "            case (speaker, speech):\n",
     "                display_html(f\"\"\"\n",
     "                    <strong>Caption</strong> #{identifier}\n",
     "                    <ul>\n",
-    "                        <li><strong>Time:</strong> {start}–{end}</li>\n",
     "                        <li><strong>Speaker:</strong> {escape(speaker)}</li>\n",
-    "                        <li><strong>Speech:</strong> <em>{escape(speech)}</em></li>\n",
     "                    </ul>\n",
     "                \"\"\")"
    ]

    "metadata": {},
    "outputs": [],
    "source": [
+    "from datetime import datetime, timedelta\n",
     "from functools import partial\n",
     "from html import escape\n",
     "from io import BytesIO\n",
     "from itertools import chain\n",
     "import re\n",
     "from webvtt import Caption, WebVTT\n",
+    "from webvtt.models import Timestamp\n",
+    "from zoneinfo import ZoneInfo\n",
     "\n",
     "display_html = partial(display_html, raw=True)"
    ]
    "metadata": {},
    "outputs": [],
    "source": [
+    "FILE_PATH = 'GMT20250411-223535_Recording.transcript.vtt'\n",
+    "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
+    "BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
    ]
   },
   {
        "\n",
        "                    <strong>Caption</strong> #344\n",
        "                    <ul>\n",
+       "                        <li><strong>Start:</strong> Friday, April 11, 2025, 07:36:54 PM EDT</li>\n",
        "                        <li><strong>Speaker:</strong> CUNY Tech Prep (CTP)</li>\n",
+       "                        <li><strong>Speech:</strong> Alright. You can pick the rooms. Now go into your rooms.</li>\n",
+       "                        <li><strong>End:</strong> Friday, April 11, 2025, 07:36:57 PM EDT</li>\n",
        "                    </ul>\n",
        "                "
       ]
     "speaker_speech_pattern = re.compile('(?:([^:]+): )?(.*)')\n",
     "\n",
     "match web_vtt.captions[343]:\n",
+    "    case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):\n",
     "        match speaker_speech_pattern.search(text).groups():\n",
     "            case (speaker, speech):\n",
     "                display_html(f\"\"\"\n",
     "                    <strong>Caption</strong> #{identifier}\n",
     "                    <ul>\n",
+    "                        <li><strong>Start:</strong> {BASE_TIME + timedelta(**start_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
     "                        <li><strong>Speaker:</strong> {escape(speaker)}</li>\n",
+    "                        <li><strong>Speech:</strong> {escape(speech)}</li>\n",
+    "                        <li><strong>End:</strong> {BASE_TIME + timedelta(**end_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
     "                    </ul>\n",
     "                \"\"\")"
    ]