Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

Kevin Li commited on Apr 19

Commit

fb92766

unverified ·

2 Parent(s): deb6243 5c7c7e5

Merge pull request #4 from CUNYTechPrep/refactor-3

Browse files

Files changed (26) hide show

.env.template +11 -6
notebooks/container.ipynb +102 -0
notebooks/google_drive.ipynb +0 -0
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb +585 -0
notebooks/web-vtt.ipynb +0 -0
notebooks/web_vtt.ipynb +355 -0
pyproject.toml +6 -3
src/ctp_slack_bot/app.py +31 -12
src/ctp_slack_bot/containers.py +4 -2
src/ctp_slack_bot/core/config.py +17 -1
src/ctp_slack_bot/core/logging.py +4 -2
src/ctp_slack_bot/core/response_rendering.py +0 -13
src/ctp_slack_bot/db/mongo_db.py +44 -49
src/ctp_slack_bot/models/__init__.py +2 -0
src/ctp_slack_bot/models/base.py +8 -15
src/ctp_slack_bot/models/google_drive.py +25 -0
src/ctp_slack_bot/models/slack.py +4 -16
src/ctp_slack_bot/models/webvtt.py +36 -39
src/ctp_slack_bot/services/__init__.py +1 -0
src/ctp_slack_bot/services/application_database_service.py +29 -0
src/ctp_slack_bot/services/content_ingestion_service.py +2 -2
src/ctp_slack_bot/services/google_drive_service.py +142 -0
src/ctp_slack_bot/services/schedule_service.py +14 -3
src/ctp_slack_bot/services/vector_database_service.py +1 -1
src/ctp_slack_bot/utils/__init__.py +1 -0
src/ctp_slack_bot/utils/secret_stripper.py +6 -0

.env.template CHANGED Viewed

@@ -3,15 +3,9 @@
 # APScheduler Configuration
 SCHEDULER_TIMEZONE=UTC
-# API Configuration
-API_HOST=0.0.0.0
-API_PORT=8000
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
-SLACK_SIGNING_SECRET=🔐
 SLACK_APP_TOKEN=🦥
-SLACK_USER_TOKEN=🦊
 # Vectorization Configuration
 EMBEDDING_MODEL=🌮
@@ -34,3 +28,14 @@ CHAT_MODEL=gpt-3.5-turbo
 MAX_TOKENS=150
 TEMPERATURE=0.8
 SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."

 # APScheduler Configuration
 SCHEDULER_TIMEZONE=UTC
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
 SLACK_APP_TOKEN=🦥
 # Vectorization Configuration
 EMBEDDING_MODEL=🌮
 MAX_TOKENS=150
 TEMPERATURE=0.8
 SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
+# Google Drive Configuration
+GOOGLE_DRIVE_ROOT_ID=1NB91EcIUXbOVcdCkXOAHdmWrDfgoh9fQ
+GOOGLE_PROJECT_ID=insufferable-slacker-123456
+GOOGLE_PRIVATE_KEY_ID=1a2b3c4d5e6f748891091d21304e506674829507
+GOOGLE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASC...\n-----END PRIVATE KEY-----\n"
+GOOGLE_CLIENT_EMAIL=botty-bot@insufferable-slacker-123456.iam.gserviceaccount.com
+GOOGLE_CLIENT_ID=123456789012345678901
+# File Monitoring Configuration
+FILE_MONITOR_ROOT_PATH=Transcripts/Friday

notebooks/container.ipynb ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Loading Dependency Injection Container in Jupyter Notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ctp_slack_bot.containers import Container\n",
+    "from ctp_slack_bot.services import VectorDatabaseService\n",
+    "\n",
+    "container = Container()\n",
+    "container.wire(packages=['ctp_slack_bot'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 16:43:46.927\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Settings(LOG_LEVEL='INFO', LOG_FORMAT='json', SCHEDULER_TIMEZONE='America/New_York', SLACK_BOT_TOKEN=SecretStr('**********'), SLACK_APP_TOKEN=SecretStr('**********'), EMBEDDING_MODEL='text-embedding-3-small', VECTOR_DIMENSION=1536, CHUNK_SIZE=1000, CHUNK_OVERLAP=200, TOP_K_MATCHES=5, MONGODB_URI=SecretStr('**********'), MONGODB_NAME='ctp_slack_bot', SCORE_THRESHOLD=0.5, HF_API_TOKEN=SecretStr('**********'), OPENAI_API_KEY=SecretStr('**********'), CHAT_MODEL='gpt-3.5-turbo', MAX_TOKENS=150, TEMPERATURE=0.8, SYSTEM_PROMPT=\"You are a helpful teaching assistant for a data science class.\\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\\nYour responses should be:\\n\\n1. Accurate and based on the class content\\n2. Clear and educational\\n3. Concise but complete\\nIf you're unsure about something, acknowledge it and suggest asking the professor.\", GOOGLE_PROJECT_ID='voltaic-reducer-294821', GOOGLE_PRIVATE_KEY_ID=SecretStr('**********'), GOOGLE_PRIVATE_KEY=SecretStr('**********'), GOOGLE_CLIENT_ID='102943207835073856980', GOOGLE_CLIENT_EMAIL='[email protected]', GOOGLE_AUTH_URI='https://accounts.google.com/o/oauth2/auth', GOOGLE_TOKEN_URI='https://oauth2.googleapis.com/token', GOOGLE_AUTH_PROVIDER_CERT_URL='https://www.googleapis.com/oauth2/v1/certs', GOOGLE_CLIENT_CERT_URL='https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com', GOOGLE_UNIVERSE_DOMAIN='googleapis.com', FILE_MONITOR_ROOT_PATH='Transcripts/Friday Building AI Applications Session')"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "container.settings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 16:45:25.997\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:26.000\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:26.279\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n",
+      "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "vector_database_service: VectorDatabaseService = container.vector_database_service()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/google_drive.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb ADDED Viewed

	@@ -0,0 +1,585 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Google Drive WebVTT Vectorizer and Storer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:27.333\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.334\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.337\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.google_drive_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreated GoogleDriveService\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.361\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated EmbeddingsModelService\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.362\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vectorization_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated VectorizationService\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.365\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datetime import datetime\n",
+    "from functools import partial\n",
+    "from html import escape\n",
+    "from IPython.display import display_html\n",
+    "from itertools import chain\n",
+    "from textwrap import wrap\n",
+    "from zoneinfo import ZoneInfo\n",
+    "\n",
+    "from ctp_slack_bot.containers import Container\n",
+    "from ctp_slack_bot.models import WebVTTContent\n",
+    "\n",
+    "display_html = partial(display_html, raw=True)\n",
+    "\n",
+    "container = Container()\n",
+    "google_drive_service = container.google_drive_service()\n",
+    "vectorization_service = container.vectorization_service()\n",
+    "vector_database_service = container.vector_database_service()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configuration\n",
+    "\n",
+    "⚠️ Configure before running the code to avoid processing the wrong file type or re-uploading past files which were already uploaded."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MIME_TYPE = \"text/vtt\" # This should probably not be changed.\n",
+    "\n",
+    "MODIFICATION_TIME_CUTOFF = datetime(2024, 8, 30, tzinfo=ZoneInfo(\"UTC\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Upload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<p>Found 7 files/folders.</p>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<p>7 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off.</p>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<p>7 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off and MIME type (<em>text/vtt</em>) criterion.</p>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "item_metadata = google_drive_service.list_directory(\"\")\n",
+    "display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
+    "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in item_metadata), \"</ul>\")))\n",
+    "\n",
+    "recent_metadata = tuple(filter(lambda metadata: MODIFICATION_TIME_CUTOFF <= metadata.modified_time, item_metadata))\n",
+    "display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off.</p>\")\n",
+    "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in recent_metadata), \"</ul>\")))\n",
+    "\n",
+    "metadata_to_process = tuple(filter(lambda metadata: metadata.mime_type == MIME_TYPE, recent_metadata))\n",
+    "display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off and MIME type (<em>{MIME_TYPE}</em>) criterion.</p>\")\n",
+    "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in metadata_to_process), \"</ul>\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Processed 7 files."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "web_vtts = tuple(WebVTTContent.from_bytes(f\"googledrive:{metadata.folder_path}/{metadata.name}\",\n",
+    "                                          {\n",
+    "                                              \"filename\": metadata.name,\n",
+    "                                              \"mimeType\": metadata.mime_type,\n",
+    "                                              \"modificationTime\": metadata.modified_time\n",
+    "                                          },\n",
+    "                                          google_drive_service.read_file_by_id(metadata.id))\n",
+    "            for metadata\n",
+    "            in metadata_to_process)\n",
+    "\n",
+    "display_html(f\"Processed {len(web_vtts)} files.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-03-Analytics-Friday-2024-09-13.cc.vtt into 496 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:37.826\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 496 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:42.297\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 496 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.319\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.320\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.340\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.341\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.380\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.500\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:42.505\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 496 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:48.862\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 496 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt into 321 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:48.866\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 321 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:52.629\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 321 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.671\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.672\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.712\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.829\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:52.831\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 321 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:21:58.227\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 321 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt into 337 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:21:58.231\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 337 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:02.126\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 337 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.186\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.187\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.207\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.352\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:02.354\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 337 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:08.520\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 337 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt into 341 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:08.524\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 341 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:12.675\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 341 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.750\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.751\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.773\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.924\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:12.926\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 341 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:18.356\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 341 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt into 378 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:18.360\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 378 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:21.808\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 378 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.873\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.874\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:21.914\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:22.029\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:22.035\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 378 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:28.108\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 378 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt into 680 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:28.113\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 680 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:34.652\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 680 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.740\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.859\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:34.866\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 680 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:43.431\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 680 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Chunked Week-01-Setup-Pandas-Friday-2024-08-30.vtt into 742 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:43.438\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 742 text string(s)…\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Vectorized Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 chunks."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m2025-04-19 19:22:50.402\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 742 chunks\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.508\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.617\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:22:50.626\u001b[0m | \u001b[34m\u001b[1mDEBUG   \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 742 documents into vectors collection\u001b[0m\n",
+      "\u001b[32m2025-04-19 19:23:01.166\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 742 vector chunks in database\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Stored Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 vectorized chunks to the database."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "for web_vtt in web_vtts:\n",
+    "    chunks = web_vtt.get_chunks()\n",
+    "    display_html(f\"Chunked {web_vtt.get_metadata().get(\"filename\")} into {len(chunks)} chunks.\")\n",
+    "    vectorized_chunks = vectorization_service.vectorize(chunks)\n",
+    "    display_html(f\"Vectorized {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} chunks.\")\n",
+    "    await (await vector_database_service).store(vectorized_chunks)\n",
+    "    display_html(f\"Stored {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} vectorized chunks to the database.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/web-vtt.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/web_vtt.ipynb ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# WebVTT Reading and Chunking Test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pure `webvtt-py` as Proof-of-concept"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime, timedelta\n",
+    "from functools import partial\n",
+    "from html import escape\n",
+    "from io import BytesIO\n",
+    "from IPython.display import display_html\n",
+    "from itertools import chain\n",
+    "import re\n",
+    "from webvtt import Caption, WebVTT\n",
+    "from webvtt.models import Timestamp\n",
+    "from zoneinfo import ZoneInfo\n",
+    "\n",
+    "display_html = partial(display_html, raw=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n",
+    "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
+    "BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(FILE_PATH, \"rb\") as file:\n",
+    "    web_vtt = WebVTT.from_buffer(BytesIO(file.read()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<ul><li>__class__</li><li>__delattr__</li><li>__dict__</li><li>__dir__</li><li>__doc__</li><li>__eq__</li><li>__format__</li><li>__ge__</li><li>__getattribute__</li><li>__getitem__</li><li>__getstate__</li><li>__gt__</li><li>__hash__</li><li>__init__</li><li>__init_subclass__</li><li>__le__</li><li>__len__</li><li>__lt__</li><li>__module__</li><li>__ne__</li><li>__new__</li><li>__reduce__</li><li>__reduce_ex__</li><li>__repr__</li><li>__setattr__</li><li>__sizeof__</li><li>__str__</li><li>__subclasshook__</li><li>__weakref__</li><li>_get_destination_file</li><li>_get_lines</li><li>_has_bom</li><li>captions</li><li>content</li><li>encoding</li><li>file</li><li>footer_comments</li><li>from_buffer</li><li>from_sbv</li><li>from_srt</li><li>from_string</li><li>header_comments</li><li>iter_slice</li><li>read</li><li>read_buffer</li><li>save</li><li>save_as_srt</li><li>styles</li><li>total_length</li><li>write</li></ul>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(member)}</li>\" for member in dir(web_vtt)), \"</ul>\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "                    <strong>Caption</strong> #344\n",
+       "                    <ul>\n",
+       "                        <li><strong>Start:</strong> Friday, April 11, 2025, 07:36:54 PM EDT</li>\n",
+       "                        <li><strong>Speaker:</strong> CUNY Tech Prep (CTP)</li>\n",
+       "                        <li><strong>Speech:</strong> Alright. You can pick the rooms. Now go into your rooms.</li>\n",
+       "                        <li><strong>End:</strong> Friday, April 11, 2025, 07:36:57 PM EDT</li>\n",
+       "                    </ul>\n",
+       "                "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "speaker_speech_pattern = re.compile(\"(?:([^:]+): )?(.*)\")\n",
+    "\n",
+    "match web_vtt.captions[343]:\n",
+    "    case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):\n",
+    "        match speaker_speech_pattern.search(text).groups():\n",
+    "            case (speaker, speech):\n",
+    "                display_html(f\"\"\"\n",
+    "                    <strong>Caption</strong> #{identifier}\n",
+    "                    <ul>\n",
+    "                        <li><strong>Start:</strong> {BASE_TIME + timedelta(**start_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
+    "                        <li><strong>Speaker:</strong> {escape(speaker)}</li>\n",
+    "                        <li><strong>Speech:</strong> {escape(speech)}</li>\n",
+    "                        <li><strong>End:</strong> {BASE_TIME + timedelta(**end_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
+    "                    </ul>\n",
+    "                \"\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Chunking\n",
+    "\n",
+    "In order for chunking to produce bits with useful context, we must not only use the caption (frame) itself, but bundle it with its surrounding frames (before and after messages)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from more_itertools import windowed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CHUNK_FRAMES_OVERLAP = 1\n",
+    "CHUNK_FRAMES_WINDOW = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table><tr><td>A</td></tr><tr><td>B</td></tr><tr><td>C</td></tr><tr><td>D</td></tr><tr><td>E</td></tr><tr><td>F</td></tr><tr><td>G</td></tr><tr><td>H</td></tr><tr><td>I</td></tr><tr><td>J</td></tr><tr><td>K</td></tr><tr><td>L</td></tr><tr><td>M</td></tr><tr><td>N</td></tr><tr><td>O</td></tr><tr><td>P</td></tr><tr><td>Q</td></tr><tr><td>R</td></tr><tr><td>S</td></tr><tr><td>T</td></tr><tr><td>U</td></tr><tr><td>V</td></tr><tr><td>W</td></tr><tr><td>X</td></tr><tr><td>Y</td></tr><tr><td>Z</td></tr></table>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "items = tuple(chr(code_point) for code_point in range(ord('A'), ord('[')))\n",
+    "display_html(f\"<table>{\"\".join(map(\"<tr><td>{}</td></tr>\".format, items))}</table>\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr><tr><td>E</td><td>F</td><td>G</td><td>H</td><td>I</td></tr><tr><td>I</td><td>J</td><td>K</td><td>L</td><td>M</td></tr><tr><td>M</td><td>N</td><td>O</td><td>P</td><td>Q</td></tr><tr><td>Q</td><td>R</td><td>S</td><td>T</td><td>U</td></tr><tr><td>U</td><td>V</td><td>W</td><td>X</td><td>Y</td></tr><tr><td>Y</td><td>Z</td><td></td><td></td><td></td></tr></table>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "chunks = tuple(windowed(items, CHUNK_FRAMES_WINDOW, step=(CHUNK_FRAMES_WINDOW - CHUNK_FRAMES_OVERLAP)))\n",
+    "display_html(f\"<table>{\"\".join(f\"<tr>{\"\".join(f\"<td>{item if item else \"\"}</td>\" for item in chunk)}</tr>\" for chunk in chunks)}</table>\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using the `WebVTTFile` Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "from hashlib import sha256\n",
+    "from zoneinfo import ZoneInfo\n",
+    "\n",
+    "from ctp_slack_bot.models import WebVTTContent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n",
+    "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
+    "MODIFICATION_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(FILE_PATH, \"rb\") as file:\n",
+    "    bytes = file.read()\n",
+    "    web_vtt_content = WebVTTContent.from_bytes(sha256(bytes).hexdigest(), {\"modification_time\": MODIFICATION_TIME}, bytes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Chunk(text=\"iyeshia: For the workshop. We want to set you up.\\n\\niyeshia: Thank you, Kevin, for a question. We want to set you up for success in year one. And so this workshop is to help you kind of like\\n\\niyeshia: figure out, or how to adjust, as you're coming into your careers what to expect like your 30 days of work, 60 days of work, 90 days of work when you are starting your full time roles. So with that, said, let us get started.\\n\\niyeshia: So the topic, of course, is going to be discussing things of like the onboarding process of what it looks like when you start your jobs. How to maneuver or move around in your workplace environments. We'll discuss negotiating raises, because last time we didn't negotiating offers. So now we pass that you already got the offer. So now we'd be at the\\n\\niyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='1-5', metadata={'start': datetime.timedelta(0), 'end': datetime.timedelta(seconds=60, microseconds=379000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\\n\\niyeshia: So let's kick it off with the onboarding process.\\n\\niyeshia: So with this, what you can expect ideally when you start your your job. There could be some type of welcome package. They might have a folder. They might have an email electronically or things like that. But it's gonna describe the details of like the company's environment. What your 1st day, or your 1st week or 1st month, a couple of months, might look like. As you're starting your onboarding process and the paperwork they might even show with you on the 1st day\\n\\niyeshia: work. You might be paired up with a Buddy or other people who might be hired at the same day, or maybe someone who was hired a year before, and they might be shadowing you to help you join and to get comfortable with your work environment.\\n\\niyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='5-9', metadata={'start': datetime.timedelta(seconds=45, microseconds=930000), 'end': datetime.timedelta(seconds=108, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\\n\\niyeshia: job or career, and then from there, if you're unsure about your onboarding process as you're starting off, please ask questions to your manager or supervisor. The best part is to ask as many questions as you can. You're new, you're learning. They understand that. So they want to hear from you and your input\\n\\niyeshia: from there, I would say, I'm just looking at the\\n\\niyeshia: the chat. Yes, prepare for a lot of paperwork. Yes, I mean W. 2 W. Fours. They might have you fill out all those things. And that was 2. Okay, all right, Kevin.\\n\\niyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='9-13', metadata={'start': datetime.timedelta(seconds=102, microseconds=82000), 'end': datetime.timedelta(seconds=166, microseconds=199000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\\n\\niyeshia: they'll go over etiquette with you of like what you can expect. At the job that can include your attire, your desk hygiene communication, checking in with managers or teams.\\n\\niyeshia: Once you, after the 30 days we get to maybe days, 60 days, and then you're able to develop like your needs. Gain a better understanding of the company, develop plans and deliverables and outcomes. And then you go into your 90 days of being on the job where you're kind of learning your role. You're kind of getting adjust, you're being more effective and being becoming more independent.\\n\\niyeshia: And then from there you be able to understand, like, after the 90 days that you're kind of like settled in maybe months 4 to 6, or maybe the whole year. You should be settled into your role, understanding what's going on understanding how different departments move and things like that. So this is just the overview of what that looks like. It's not necessarily concrete, because every job is different.\\n\\niyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='13-17', metadata={'start': datetime.timedelta(seconds=147, microseconds=8000), 'end': datetime.timedelta(seconds=233, microseconds=730000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\\n\\niyeshia: And so now that we've got through the onboarding process, this is probably the quickest we've done onboarding process because Kevin did it in 2 weeks. So from there we are going to move to navigating the workplace environment.\\n\\niyeshia: And so with that said, some things that are really important in your workplace environment is building relationships. Whether that's with your peers, your colleagues. Your manager. Trying to have a mentor mentee connection. All relationships are important.\\n\\niyeshia: With that I would say that when it comes to identifying your relationship needs, you want to know what you're expecting like, what? How do you need to show up in your role. What do you need from others? Understanding those type of things can help build better, I would say. Connections with your teammates and things of that nature when it's time to like cover problems or solve projects and things like that.\\n\\niyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='17-21', metadata={'start': datetime.timedelta(seconds=220, microseconds=406000), 'end': datetime.timedelta(seconds=315, microseconds=170000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\\n\\niyeshia: And you can add basically help also to learn how to understand and help others. As well.\\n\\niyeshia: Another thing, as far as building relationships goes, is practicing, mindful listening. So the best way to truly listen is to talk less, and of course to understand more. And so when you learn from your teammates, listen as much as you can gain as much knowledge as you can from others, and that's gonna help you kinda conduct, or, you know, be a better team player. In your work environment.\\n\\niyeshia: And then a few things that you can do is\\n\\niyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='21-25', metadata={'start': datetime.timedelta(seconds=288, microseconds=600000), 'end': datetime.timedelta(seconds=376, microseconds=110000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\\n\\niyeshia: I don't care if it's a janitor security. The Cfo treat everybody equal and the same. And get to know. Get to know people because you just never know when you're going to need someone or work with someone. During that time.\\n\\niyeshia: And so those are the ways you can go about it. Greet people. You can invite people to coffee breaks, do quick message, check-in, and things of that nature, and then from there the 6 or 7 1, i think, are really important in the workplace environment. Some of the things you want to do is show gratitude, embrace others, give.\\n\\niyeshia: you know, credit where credit is due. Don't try to take anybody's ideas. If it comes to projects and things like that, that is a serious no-no show gratitude, and by any means necessary, try to avoid any gossip, any issues with office politics stay out of it. This is your first.st\\n\\niyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='25-29', metadata={'start': datetime.timedelta(seconds=351, microseconds=10000), 'end': datetime.timedelta(seconds=438, microseconds=590000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\\n\\niyeshia: So the next thing, as far as we're talking about building relationship goals, you definitely want to also build those relationships, as I stated, with your peers. And things like that. Your coworkers? But you want to make sure you build a relationship with your manager. And just remember that it's important to have a relationship with your manager. But that's not the only relationship that's like you should focus on, you know. Like, I said before, you want to be a team play. You want to treat everybody equally because you just never know who you connect with.\\n\\niyeshia: But when it comes to that manager time, or asking for I would say, supervisions or meetings with them. You can ask questions. Those are always encouraged. You can ask them about their you know, supervisor style. Are they transformative? Are they hands on?\\n\\niyeshia: Do they like feedback directly towards them? Is everything written email? How are they? What's their work? Style? You can even ask them for the expectations of what is this like in a role like, what are your expectations, as far as how you show up in your role to them? And what are they looking for like with the measurements of success. Of course we always tell fellows to document everything that you do, as far as like when it comes to any goals that you bring any success.\\n\\niyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='29-33', metadata={'start': datetime.timedelta(seconds=424, microseconds=830000), 'end': datetime.timedelta(seconds=536, microseconds=219000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\\n\\niyeshia: and then to talk with your manager about not only your successes and what you accomplish, but maybe areas of where you can grow and what you've been struggling to focus on so they can help support you with that as well.\\n\\niyeshia: Be observant in meetings when you're meeting with your team and other people. So that way you could learn about what else is going on, or whatever what everybody else is doing. So you can see how things work together. If you want to connect and socialize, you can ask people to lunch or coffee chats and things like that, and then always just remain proactive. You know it's always a good gesture to ask for teammate. It's like, Hey, is there anything you need before you know the end of the day? Or before I'm about to leave. You know things like that. It's always\\n\\niyeshia: helpful, too, because you never know when it's like your time, and someone is asking or offering help to you. And you're like, Oh, yeah, definitely need help with this. So it's always great to return their favor.\\n\\niyeshia: And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='33-37', metadata={'start': datetime.timedelta(seconds=511, microseconds=850000), 'end': datetime.timedelta(seconds=589, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And so\\n\\niyeshia: from there I would say, overall in regards of meeting with your supervisor, depending on how they do it. It could be quarterly it could be every other month. It could be 3 times throughout the year. They have a performance review. And so some companies like to start with, maybe January, you start, or maybe June, you started\\n\\niyeshia: working with them, and you track goals and what you could accomplish. With your manager until, like the next meeting, you have to go over just to make sure that you're on track with your goals throughout the throughout the year, as you've been working with your with your company.\\n\\niyeshia: That you got hired by, and so sometimes they'll do like a mid year review report to see your progress. If there's any touch points they could assist you with or support you with. You can meet with them with one on one meetings. If you feel like that's too long, and you want to make suggestions to meet with them sooner. Maybe you want to do every 3 months\\n\\niyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='37-41', metadata={'start': datetime.timedelta(seconds=587, microseconds=800000), 'end': datetime.timedelta(seconds=654, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\\n\\niyeshia: but of course you'll get used to it. As it progresses. But then, of course, you're still maintaining those connections with your supervisor, so you can definitely ask them questions of what you can expect from a performance review and things like that.\\n\\niyeshia: I'll pause here. If anybody has any questions about anything that I've mentioned. Anything like that?\\n\\niyeshia: Any questions? Are we all good.\\n\\nCUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='41-45', metadata={'start': datetime.timedelta(seconds=645, microseconds=172000), 'end': datetime.timedelta(seconds=682, microseconds=250000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\\n\\nCUNY Tech Prep (CTP): No takers.\\n\\nCUNY Tech Prep (CTP): I have a few comments.\\n\\niyeshia: You want to go ahead, Kevin.\\n\\nCUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='45-49', metadata={'start': datetime.timedelta(seconds=678, microseconds=110000), 'end': datetime.timedelta(seconds=700, microseconds=910000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\\n\\nCUNY Tech Prep (CTP): not such a nice work environment.\\n\\nCUNY Tech Prep (CTP): It helps prevent people from gaslighting. You, for example.\\n\\nCUNY Tech Prep (CTP): And like it keeps you out of trouble. Let's say cause if you self document, then\\n\\nCUNY Tech Prep (CTP): you know exactly what was decided on.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='49-53', metadata={'start': datetime.timedelta(seconds=693, microseconds=509000), 'end': datetime.timedelta(seconds=720, microseconds=809000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): you know exactly what was decided on.\\n\\nCUNY Tech Prep (CTP): And you're just following exactly what was said.\\n\\niyeshia: That is correct.\\n\\nCUNY Tech Prep (CTP): And then the setting boundaries right.\\n\\nCUNY Tech Prep (CTP): and there are some. There are some\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='53-57', metadata={'start': datetime.timedelta(seconds=717, microseconds=970000), 'end': datetime.timedelta(seconds=732, microseconds=590000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): and there are some. There are some\\n\\nCUNY Tech Prep (CTP): bosses who will push your boundaries. Try to get you to like\\n\\nCUNY Tech Prep (CTP): do overtime. Stay longer than like\\n\\nCUNY Tech Prep (CTP): your stay longer than what's on like the contract, or whatever.\\n\\nCUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='57-61', metadata={'start': datetime.timedelta(seconds=729, microseconds=400000), 'end': datetime.timedelta(seconds=749, microseconds=960000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\\n\\nCUNY Tech Prep (CTP): you should be very clear on\\n\\nCUNY Tech Prep (CTP): your time. Your time limits, like.\\n\\nCUNY Tech Prep (CTP): you know, have always have an out, for\\n\\nCUNY Tech Prep (CTP): when too much is being requested of you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='61-65', metadata={'start': datetime.timedelta(seconds=745, microseconds=275000), 'end': datetime.timedelta(seconds=767, microseconds=120000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): when too much is being requested of you.\\n\\nCUNY Tech Prep (CTP): My usual go to is like, Oh, I I have like I have a meeting for Ctp, or like I have class.\\n\\niyeshia: Very good. That's good to good to know. And I know. David. Put in the chat like for an example of documentation. On March 16, th at 4, 35, you said, and I quote that is, that is exactly.\\n\\nCUNY Tech Prep (CTP): Under my lap.\\n\\niyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='65-69', metadata={'start': datetime.timedelta(seconds=764, microseconds=400000), 'end': datetime.timedelta(seconds=803, microseconds=550000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\\n\\nCUNY Tech Prep (CTP): Yeah.\\n\\niyeshia: We wrote that and gave them the time so absolutely documentation goals for the good and for the bad. So definitely. Thank you for sharing that Kevin and David?\\n\\niyeshia: And so with that said, We'll go on to the the next slide. Which is a question of is my manager the same as having a mentor. Does anybody want to come off the come off mute and say yes or no?\\n\\niyeshia: I can just call on Kyle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='69-73', metadata={'start': datetime.timedelta(seconds=795, microseconds=400000), 'end': datetime.timedelta(seconds=831, microseconds=790000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: I can just call on Kyle.\\n\\nCUNY Tech Prep (CTP): Kyle, you there.\\n\\nKyle Schoenhardt: No, it's not.\\n\\niyeshia: Okay, let's see.\\n\\niyeshia: Yay, good job, PAL. The answer is, no.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='73-77', metadata={'start': datetime.timedelta(seconds=828, microseconds=820000), 'end': datetime.timedelta(seconds=844, microseconds=930000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'Kyle Schoenhardt', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Yay, good job, PAL. The answer is, no.\\n\\niyeshia: Did you want to give more input?\\n\\nKyle Schoenhardt: Yeah. Sure.\\n\\niyeshia: Yeah.\\n\\nKyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='77-81', metadata={'start': datetime.timedelta(seconds=841, microseconds=340000), 'end': datetime.timedelta(seconds=869, microseconds=440000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n",
+       " Chunk(text=\"Kyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\\n\\nKyle Schoenhardt: how you can improve on yourself like a coach.\\n\\nKyle Schoenhardt: Constantly giving you feedback, whether positive or negative.\\n\\nKyle Schoenhardt: I would say someone you would\\n\\nKyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='81-85', metadata={'start': datetime.timedelta(seconds=850, microseconds=340000), 'end': datetime.timedelta(seconds=885, microseconds=510000), 'speakers': frozenset({'Kyle Schoenhardt'})}),\n",
+       " Chunk(text=\"Kyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\\n\\nKyle Schoenhardt: a good indicator, that that person is not a mentor, or, if you need help with something, your your 1st go to person to that you think of is\\n\\nKyle Schoenhardt: someone else that is probably who your mentor is most likely to be, could be a coworker. It could be a manager, but it's not always.\\n\\niyeshia: Got it. Thank you, Kevin. I mean. Thank you, Kyle, said Kevin. Thank you. Kyle. Appreciate that. With that, said, I don't feel like I need to add any more. I feel like Kyle took that. So I'm gonna move on to the day.\\n\\niyeshia: So the next question is, should my manager, be my mentor.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='85-89', metadata={'start': datetime.timedelta(seconds=879, microseconds=360000), 'end': datetime.timedelta(seconds=919, microseconds=30000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: So the next question is, should my manager, be my mentor.\\n\\niyeshia: Alison.\\n\\nAllison Lee: Well, you you can't force a mentor mentee relationship if that's not how it's going to work.\\n\\nAllison Lee: But it is possible for your manager to be some kind of mentor figure.\\n\\niyeshia: Thank you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='89-93', metadata={'start': datetime.timedelta(seconds=914, microseconds=565000), 'end': datetime.timedelta(seconds=945, microseconds=810000), 'speakers': frozenset({'iyeshia', 'Allison Lee'})}),\n",
+       " Chunk(text=\"iyeshia: Thank you.\\n\\niyeshia: So with that, said.\\n\\niyeshia: that depends. So I appreciate Allison. Your response. It definitely depends. Can't force them. But of course, if you do get along with your supervisor, and you want to ask them that\\n\\niyeshia: by all means. But good, answers everyone.\\n\\niyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='93-97', metadata={'start': datetime.timedelta(seconds=944, microseconds=920000), 'end': datetime.timedelta(seconds=975, microseconds=362000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\\n\\niyeshia: Kyle touched on was that they provide support, wisdom to help you succeed in certain examples are, this is pretty much sharing any ideas you might have with them from paying program with you on a code base providing feedback, maybe on a slide deck to helping you remind that it's impossible to know everything. So they're kind of reassuring you in your in your role as you're starting your career.\\n\\niyeshia: and then you want to make sure your mentor is a is a safe space for you at the time. Sometimes your mentor. You can talk to your mentor about your manager sometimes if they are difficult or not, and so from there it's a form of trust\\n\\niyeshia: with your with your mentor. So if you have, if you are blessed to have a supervisor who can be both roles, a manager and a mentor. Go for it, if you're like. I'm still learning. I'm only 3, 30 days in 60 days, 90 days. Take your time, then. So that is definitely something to to know from that.\\n\\niyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='97-101', metadata={'start': datetime.timedelta(seconds=964, microseconds=630000), 'end': datetime.timedelta(seconds=1046, microseconds=430000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\\n\\niyeshia: anyone?\\n\\niyeshia: Go ahead, Devon, please.\\n\\nDevin Xie (no cam): Employee resource groups.\\n\\niyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='101-105', metadata={'start': datetime.timedelta(seconds=1035, microseconds=839000), 'end': datetime.timedelta(seconds=1085, microseconds=780000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
+       " Chunk(text=\"iyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\\n\\niyeshia: in some of your environments. And then, when you have your community, you can always reflect on interests related to tech.\\n\\niyeshia: or maybe research on your company like, who's in your area. And you could always reach out to some people for informational interviews. If you're really trying to seek this mentor Mentee relationship from people who are at your company. So just to keep that in mind.\\n\\niyeshia: I think I saw something.\\n\\niyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='105-109', metadata={'start': datetime.timedelta(seconds=1057, microseconds=780000), 'end': datetime.timedelta(seconds=1131, microseconds=240000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\\n\\niyeshia: with that, said, I think y'all know the roles between manager and mentor, and I appreciate that.\\n\\niyeshia: So now the next part is negotiating raises. So the last workshop we did was negotiating offers, as I stated before. So this one's gonna be a little different. You got the job. So now, after that whole success in your 1st year you want to start discussing maybe time for a raise. So let's get into that.\\n\\niyeshia: So you did a great job.\\n\\niyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='109-113', metadata={'start': datetime.timedelta(seconds=1114, microseconds=170000), 'end': datetime.timedelta(seconds=1167, microseconds=119000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\\n\\niyeshia: Your success is going to be measured by achievements, contributions into your organization, and that could be rewarded with\\n\\niyeshia: money or something else you value that could be related to time. Things of that nature. You want to go up based off your benefits. As we stated before, in the last workshop, you might wanna negotiate that. But if you want to talk about money first.st That's okay, too.\\n\\niyeshia: And these are gonna help you, too, as well with your I would say. Manager or supervisor. Meetings\\n\\niyeshia: from there.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='113-117', metadata={'start': datetime.timedelta(seconds=1160, microseconds=790000), 'end': datetime.timedelta(seconds=1199, microseconds=450000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: from there.\\n\\niyeshia: So just remember that it's okay when you when you flex those negotiating offers or flex those muscles during conversations around raises. It's not bragging. If you're talking about your achievements and things like that. It's okay to to talk about your successes, you know, especially during a raise time, because you're trying to show your manager or prove what you brought to the to the table. So keep that in mind.\\n\\niyeshia: So how does it look.\\n\\nCUNY Tech Prep (CTP): Comments, sorry.\\n\\niyeshia: Yeah, that is.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='117-121', metadata={'start': datetime.timedelta(seconds=1198, microseconds=703000), 'end': datetime.timedelta(seconds=1228, microseconds=390000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: Yeah, that is.\\n\\nCUNY Tech Prep (CTP): Something you would also document. If your manager praises you, you document that.\\n\\niyeshia: That.\\n\\nCUNY Tech Prep (CTP): Is evidence you can use in your negotiations.\\n\\niyeshia: That is such a fact.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='121-125', metadata={'start': datetime.timedelta(seconds=1227, microseconds=350000), 'end': datetime.timedelta(seconds=1240, microseconds=380000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: That is such a fact.\\n\\niyeshia: I literally just copy to paste everything, my manager said. Yep, one of my negotiation days. Yep, so thank you, Kevin, for saying that? So with that said, if you have those those meetings with them, document not only what you say, but what they said, as Kevin mentioned.\\n\\niyeshia: That was great in the negotiating offer. So how else do we prepare for this?\\n\\niyeshia: You're going to research? Yes, you're going to gather all your feedback, whether it's from your colleagues and meetings, whether it's from the success that you hear from your manager or tips from people that you work with, you're going to make sure you learn about your role. What's going on in the market. Just research is going to be your best.\\n\\niyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='125-129', metadata={'start': datetime.timedelta(seconds=1238, microseconds=990000), 'end': datetime.timedelta(seconds=1296, microseconds=189000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\\n\\niyeshia: yeah, document everything, because you might forget some stuff. So that's definitely gonna help, too.\\n\\niyeshia: With that, said, you want to make sure you remind everyone. Maybe you save a bunch of money for the company. Oh, maybe you help them with other accomplishments, or maybe you spend off a project that's done really well. For your department. Share it. So please feel free to do that.\\n\\niyeshia: and then that will also help you keep your resume updated as well. So you don't have to worry about trying to\\n\\niyeshia: scatter or get all your thoughts together at the last minute.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='129-133', metadata={'start': datetime.timedelta(seconds=1281, microseconds=940000), 'end': datetime.timedelta(seconds=1331, microseconds=399000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: scatter or get all your thoughts together at the last minute.\\n\\niyeshia: And then with that status also, your manager needs to have the facts, too, to convince their boss to approve you for a raise. So if your manager is giving you the praises already, they're like, yeah, I did say that like\\n\\niyeshia: as well. Even if they make a joke like saying to you like, Hey, you deserve a raise document that you could go right back to like, you know. April 11th at 5, at 6 58 pm. You said, I deserve a raise this time like it. Just everything will just work for you in your favor for that, so please feel free to do that.\\n\\niyeshia: And so now you did the you did the raise. You had the meeting with your your manager. They're proposing it to the Supervisor, or things of that nature. I know different companies work in different ways, so they might have you go directly to your boss's boss to talk about the raise, or whoever is in charge of that\\n\\niyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='133-137', metadata={'start': datetime.timedelta(seconds=1328, microseconds=370000), 'end': datetime.timedelta(seconds=1410, microseconds=720000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\\n\\niyeshia: negotiate for promotion which would include maybe getting a title change, or better money that comes with it. This is why we say research, because you can definitely research what's going on in the market saying, Hey, that's my job. But the title is different.\\n\\niyeshia: Look that up and like definitely propose that if you want to. You can even ask for a faster review cycle. If they say something like, Hey, we can't give that to you. Just yet today. But let's revisit this topic on the 6 months, maybe, like, hey? Can we meet sooner, maybe in 3 months, to discuss more about how I can go about this\\n\\niyeshia: and then you could simply, if they say no. Ask why? Because you don't want to hear anything as far as like knowing that period. No, they should give you an explanation for it. So always ask questions with that to help like what's driving? That? Was it bad timing? Is there a gap? Is there their cap? Is there certain budgets. Did I miss anything that could help? So they can definitely\\n\\niyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='137-141', metadata={'start': datetime.timedelta(seconds=1386, microseconds=520000), 'end': datetime.timedelta(seconds=1487, microseconds=429000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\\n\\niyeshia: Nobody. Okay. Devin.\\n\\nCUNY Tech Prep (CTP): Devin does Devon.\\n\\nDevin Xie (no cam): Just curious. So like, say, we\\n\\nDevin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='141-145', metadata={'start': datetime.timedelta(seconds=1467, microseconds=260000), 'end': datetime.timedelta(seconds=1503, microseconds=140000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia', 'Devin Xie (no cam)'})}),\n",
+       " Chunk(text=\"Devin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\\n\\nDevin Xie (no cam): And then we have questions about this stuff like.\\n\\nDevin Xie (no cam): let's say we work there for like a year. And we\\n\\nDevin Xie (no cam): we stop. We we want to ask for some advice. Can we still hit you guys up.\\n\\niyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='145-149', metadata={'start': datetime.timedelta(seconds=1499, microseconds=630000), 'end': datetime.timedelta(seconds=1531, microseconds=469000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
+       " Chunk(text=\"iyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\\n\\niyeshia: that, and I will be gladly to assist you. There. We have a career coach there, but usually all the the staff is on the Ctv team is on the alumni channel. So yeah, definitely. But we also like, I said before, Devin, save the Powerpoint, too.\\n\\niyeshia: Just putting that out there? So yeah, good question.\\n\\niyeshia: Okay?\\n\\niyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='149-153', metadata={'start': datetime.timedelta(seconds=1513, microseconds=30000), 'end': datetime.timedelta(seconds=1577, microseconds=890000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\\n\\niyeshia: You can always confirm with your manager? Like. If the reason they said no, was it because there's certain maybe I would say physical years of like, how they what deadline they have for the New Year or the new budget. Time or deadline, was it? Did I miss it when I asked for a salary? Or when's the next time I should ask for a salary. Increase, and things like that. Cause your your department, or you would hope the team that you're on will show you throughout the year of like what's coming up and what you can expect.\\n\\niyeshia: So you definitely want to plan ahead next time. If they say no, and then review the work and the feedback asking for feedback. Was it my, the way that I would propose the raise? Is there anything I could do to get? You know better on that? That would help with the mentor, of course.\\n\\niyeshia: Cause the person you're proposing it to might not give the input. But definitely, a mentor is gonna help you with that as well to see what's going on. You could definitely check in with your manager. If they had any feedback they might tell your manager to like, let them know like this is why they might have said No or this? Why, they might have said, Not yet, or they'll say yes later. So keep that in mind.\\n\\niyeshia: and then let's see right\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='153-157', metadata={'start': datetime.timedelta(seconds=1553, microseconds=290000), 'end': datetime.timedelta(seconds=1648, microseconds=679000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: and then let's see right\\n\\niyeshia: from there we'll go to the activity.\\n\\niyeshia: And so from there, this is an activity of asking for feedback.\\n\\niyeshia: And we're gonna do a scenario of you want to ask for feedback from your manager.\\n\\niyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='157-161', metadata={'start': datetime.timedelta(seconds=1641, microseconds=970000), 'end': datetime.timedelta(seconds=1673, microseconds=539000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\\n\\niyeshia: So this part is, how would you start that conversation in your weekly check in?\\n\\niyeshia: So since we're virtual, we're gonna have, I'm gonna give you about 30 seconds to come up with your own answer, and then type it in the chat.\\n\\niyeshia: So review the scenario now and then we'll start in 30 seconds.\\n\\niyeshia: So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='161-165', metadata={'start': datetime.timedelta(seconds=1665, microseconds=550000), 'end': datetime.timedelta(seconds=1692, microseconds=620000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: So\\n\\niyeshia: we set the timer for 30.\\n\\niyeshia: Okay?\\n\\niyeshia: Goes now\\n\\niyeshia: 10 seconds.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='165-169', metadata={'start': datetime.timedelta(seconds=1691, microseconds=890000), 'end': datetime.timedelta(seconds=1727, microseconds=70000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: 10 seconds.\\n\\niyeshia: Okay, time is up.\\n\\niyeshia: Okay, nice.\\n\\niyeshia: And look for a raise on to guarantee a raise in this performance. Review. Awesome. Thank you. Ty\\n\\niyeshia: and Mckenzie. Thank you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='169-173', metadata={'start': datetime.timedelta(seconds=1725, microseconds=970000), 'end': datetime.timedelta(seconds=1767, microseconds=160000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: and Mckenzie. Thank you.\\n\\niyeshia: 13.\\n\\niyeshia: Some feedback to see what I can build. Awesome.\\n\\niyeshia: Hey, boys!\\n\\niyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='173-177', metadata={'start': datetime.timedelta(seconds=1765, microseconds=20000), 'end': datetime.timedelta(seconds=1785, microseconds=509000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.\\n\\niyeshia: No.\\n\\niyeshia: Okay.\\n\\niyeshia: Any improvement that you see that I cannot. Okay, thank you.\\n\\niyeshia: Let me check in with you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='177-181', metadata={'start': datetime.timedelta(seconds=1780, microseconds=400000), 'end': datetime.timedelta(seconds=1810, microseconds=859000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Let me check in with you.\\n\\niyeshia: There we go.\\n\\niyeshia: Okay, perfect.\\n\\niyeshia: So what I can make for the next recycle. Awesome. Thank you all for sharing so far, I'm gonna move on to the the next part. I think I kind of skipped\\n\\niyeshia: ahead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='181-185', metadata={'start': datetime.timedelta(seconds=1807, microseconds=139000), 'end': datetime.timedelta(seconds=1830, microseconds=670000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: ahead.\\n\\niyeshia: Okay.\\n\\niyeshia: so right now, we have a role play example between a manager and you. Let's say you would.\\n\\niyeshia: it could be data science. Related. Right? So from here, I'm going to\\n\\niyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='185-189', metadata={'start': datetime.timedelta(seconds=1829, microseconds=480000), 'end': datetime.timedelta(seconds=1857, microseconds=657000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\\n\\niyeshia: So let me see who I can get.\\n\\niyeshia: Okay, I'll go with David for manager, and I'll go for\\n\\niyeshia: Let's try, Kevin for you.\\n\\niyeshia: If you have to read this role, play example.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='189-193', metadata={'start': datetime.timedelta(seconds=1850, microseconds=520000), 'end': datetime.timedelta(seconds=1877, microseconds=689000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text='iyeshia: If you have to read this role, play example.\\n\\nDavid Rodriguez: Should I start now?\\n\\nCUNY Tech Prep (CTP): Kevin, you there?\\n\\nCUNY Tech Prep (CTP): Kevin? Chen.\\n\\nKevin Zheng: Right, right.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='193-197', metadata={'start': datetime.timedelta(seconds=1874, microseconds=660000), 'end': datetime.timedelta(seconds=1892, microseconds=270000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n",
+       " Chunk(text=\"Kevin Zheng: Right, right.\\n\\nCUNY Tech Prep (CTP): Alright!\\n\\nDavid Rodriguez: Great I'll start.\\n\\nDavid Rodriguez: Is there anything else you'd like to talk about?\\n\\nKevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='197-201', metadata={'start': datetime.timedelta(seconds=1891, microseconds=450000), 'end': datetime.timedelta(seconds=1910, microseconds=499000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'Kevin Zheng'})}),\n",
+       " Chunk(text=\"Kevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\\n\\nDavid Rodriguez: We really appreciate your hard work.\\n\\nDavid Rodriguez: but it's still a tough economy, and we're not really in a position to give you anything more than a 2% raise. We can talk about a raise at your next review in about 6 months.\\n\\nKevin Zheng: I do understand that the economy has made things difficult. Can we set a time to discuss my compensation again before my next schedule Review.\\n\\nKevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='201-205', metadata={'start': datetime.timedelta(seconds=1901, microseconds=690000), 'end': datetime.timedelta(seconds=1938, microseconds=959000), 'speakers': frozenset({'David Rodriguez', 'Kevin Zheng'})}),\n",
+       " Chunk(text=\"Kevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\\n\\nDavid Rodriguez: Sure that makes sense.\\n\\nDavid Rodriguez: I want to make sure you heard how about a month.\\n\\nKevin Zheng: Great. Thank you. I'll find some time on your calendar for us to meet.\\n\\niyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='205-209', metadata={'start': datetime.timedelta(seconds=1933, microseconds=720000), 'end': datetime.timedelta(seconds=1967, microseconds=303000), 'speakers': frozenset({'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n",
+       " Chunk(text=\"iyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\\n\\niyeshia: that during the the role play. That the let's say the data scientists who was played by Kevin,\\n\\niyeshia: did as far as like, maybe something different from your responses that you put in the chat. Did y'all notice anything differently?\\n\\niyeshia: Hey, Devin?\\n\\nDevin Xie (no cam): I don't know if I'm correct. But I think\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='209-213', metadata={'start': datetime.timedelta(seconds=1957, microseconds=300000), 'end': datetime.timedelta(seconds=2005, microseconds=496000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
+       " Chunk(text=\"Devin Xie (no cam): I don't know if I'm correct. But I think\\n\\nDevin Xie (no cam): the data scientists or us in this situation, we try to like Scheduler, a review like\\n\\nDevin Xie (no cam): in a later time.\\n\\niyeshia: absolutely. Thank you. He took initiative and be like, you know, hey, let me, let me get on your calendar for next time, instead of just like waiting around, you know, people be like, Oh, I'll get back to you and things like that. He's like, no, we can. We can discuss later, like, what's your schedule like? So that\\n\\niyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='213-217', metadata={'start': datetime.timedelta(seconds=2002, microseconds=950000), 'end': datetime.timedelta(seconds=2041, microseconds=590000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
+       " Chunk(text=\"iyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\\n\\niyeshia: So and so, for now I would say this would take about maybe\\n\\niyeshia: so final reflection. We could talk about this for like maybe 3\\xa0min, or anybody could just like popcorn it out unless I just call on them. But for today's learning from the workshop what are some things you can generally expect when you 1st join a company? What is a manager's role in your success? And how do you find out your measures of success? Does anyone want to\\n\\niyeshia: volunteer and answer any of the any of the 3 questions that are of their choice\\n\\niyeshia: before I call on someone.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='217-221', metadata={'start': datetime.timedelta(seconds=2035, microseconds=850000), 'end': datetime.timedelta(seconds=2087, microseconds=550000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: before I call on someone.\\n\\niyeshia: Okay, anybody but Devin.\\n\\niyeshia: See, I'm gonna go with anthony.\\n\\nAnthony Jerez: Yes, I'm here.\\n\\niyeshia: Which question would you like to answer? You had to reflect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='221-225', metadata={'start': datetime.timedelta(seconds=2086, microseconds=20000), 'end': datetime.timedelta(seconds=2122, microseconds=210000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Which question would you like to answer? You had to reflect.\\n\\nAnthony Jerez: On, I would say the 1st one.\\n\\niyeshia: Okay, go for it.\\n\\nAnthony Jerez: So some major things that I would expect would be we're going through like sessions like orientation, and like onboarding\\n\\nAnthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='225-229', metadata={'start': datetime.timedelta(seconds=2119, microseconds=390000), 'end': datetime.timedelta(seconds=2147, microseconds=390000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
+       " Chunk(text=\"Anthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\\n\\nAnthony Jerez: And yeah, stuff like that. I would say.\\n\\niyeshia: Thank you, Anthony, for sharing.\\n\\niyeshia: and then let me see, trying to see who's not making eye contact. Oh, oh, not everybody looks okay. So let's go with\\n\\niyeshia: Ibrahim.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='229-233', metadata={'start': datetime.timedelta(seconds=2139, microseconds=43000), 'end': datetime.timedelta(seconds=2167, microseconds=810000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Ibrahim.\\n\\nIbrahim Faruquee: Yeah, I'll answer question, too.\\n\\nIbrahim Faruquee: So your manager's role is mainly like for the company to manage like people and make sure that the right persons for the right job, but they can be like a mentor figure for you. So like, if there can be like good mentors who like help you throughout the process and help you with a raise, or they could also like, be difficult and make that like harder for you. But they're kind of. It's not like there's nothing to be, I guess, expected from a manager. It's just like\\n\\nIbrahim Faruquee: what they like. What do you, I guess. What do you end up with.\\n\\nIbrahim Faruquee: or what do you make the most of.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='233-237', metadata={'start': datetime.timedelta(seconds=2166, microseconds=780000), 'end': datetime.timedelta(seconds=2208, microseconds=880000), 'speakers': frozenset({'iyeshia', 'Ibrahim Faruquee'})}),\n",
+       " Chunk(text=\"Ibrahim Faruquee: or what do you make the most of.\\n\\niyeshia: Awesome. Thank you.\\n\\niyeshia: And then for the 3rd question.\\n\\niyeshia: and we're gonna go for Isabel.\\n\\nIsabel Loçi: Hello!\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='237-241', metadata={'start': datetime.timedelta(seconds=2207, microseconds=390000), 'end': datetime.timedelta(seconds=2223, microseconds=750000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia', 'Ibrahim Faruquee'})}),\n",
+       " Chunk(text=\"Isabel Loçi: Hello!\\n\\niyeshia: Hello!\\n\\nIsabel Loçi: Sorry. My Internet's horrible, and might I might disconnect?\\n\\nIsabel Loçi: I'll see if I can answer the 3rd one. How do you find your measures of success.\\n\\nIsabel Loçi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='241-245', metadata={'start': datetime.timedelta(seconds=2222, microseconds=900000), 'end': datetime.timedelta(seconds=2245, microseconds=189000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia'})}),\n",
+       " Chunk(text=\"Isabel Loçi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\\n\\nIsabel Loçi: That way you get a better understanding of where you are right now. And also I would say to also look back on the goals that you've set for yourself, and see if you've reached those goals as well, and that would be a good measure of success.\\n\\niyeshia: Okay, very good. All right.\\n\\niyeshia: So yeah, definitely helped make my life easier with this presentation. So thank you. I'm glad things are sticking and so with that said, We will go and launch Kahoo. But before I do that I definitely want to say just be mindful of these things.\\n\\niyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='245-249', metadata={'start': datetime.timedelta(seconds=2238, microseconds=660000), 'end': datetime.timedelta(seconds=2306, microseconds=319000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\\n\\niyeshia: So just keep that in mind.\\n\\niyeshia: And then, if you are going to seek, you know, support, I think. It was great that it's a bell, stated asking for feedback from your manager, but you could also ask for feedback from your teammates, too. Cause they, if you work with them closely. If you have a team to see, like what your areas of strengths are your areas of growth.\\n\\niyeshia: and things that you're learning. That could be helpful. Towards that process if you're going up for a raise. But sometimes people could see our strengths stronger or clearer, or even faster than we can, and we don't even realize it.\\n\\niyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='249-253', metadata={'start': datetime.timedelta(seconds=2282, microseconds=771000), 'end': datetime.timedelta(seconds=2345, microseconds=799000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\\n\\niyeshia: if you are going to negotiate, remember to keep for raise, to keep that documented focus on your skills. Make sure you do your research on the market and definitely, just try to figure out if you can negotiate other things.\\n\\niyeshia: And when it comes to relationships, at work, you wanna make sure to treat everybody equally so I hope that that helps. If you didn't get anything else. I hope that's what helps you with them\\n\\niyeshia: with your 1st year? As you enter into your careers. And so with that said, we'll go into Kahoot.\\n\\niyeshia: and so I'm going to launch it now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='253-257', metadata={'start': datetime.timedelta(seconds=2341, microseconds=80000), 'end': datetime.timedelta(seconds=2390, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: and so I'm going to launch it now.\\n\\niyeshia: Let's get it started.\\n\\niyeshia: I don't think my headphones died so\\n\\niyeshia: got 33 people on here, and only 16.\\n\\niyeshia: Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='257-261', metadata={'start': datetime.timedelta(seconds=2387, microseconds=420000), 'end': datetime.timedelta(seconds=2445, microseconds=90000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Okay.\\n\\niyeshia: sound. Good.\\n\\niyeshia: 33.\\n\\niyeshia: Well, I didn't cut myself. That's Kevin. You're playing too.\\n\\niyeshia: Figure out how to be successful on my own.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='261-265', metadata={'start': datetime.timedelta(seconds=2444, microseconds=230000), 'end': datetime.timedelta(seconds=2550, microseconds=965000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Figure out how to be successful on my own.\\n\\niyeshia: Oh, you do not have to figure that out.\\n\\niyeshia: That's why we tell you, have mentors, extra peers and things of that nature.\\n\\niyeshia: Well, yeah, shout out to the 22. It's okay. One. I'll take the 22 others, you know. Wow!\\n\\niyeshia: Your boss. My goodness, okay, is in the lead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='265-269', metadata={'start': datetime.timedelta(seconds=2547, microseconds=780000), 'end': datetime.timedelta(seconds=2583, microseconds=779000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Your boss. My goodness, okay, is in the lead.\\n\\niyeshia: So let's go ahead\\n\\niyeshia: who should not go to\\n\\niyeshia: thank you definitely. The worst thing you could do is talk to no one. If you need support with something.\\n\\niyeshia: So I hope.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='269-273', metadata={'start': datetime.timedelta(seconds=2578, microseconds=507000), 'end': datetime.timedelta(seconds=2624, microseconds=130000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: So I hope.\\n\\nCUNY Tech Prep (CTP): I am shocked.\\n\\niyeshia: That one should you not go to? So yeah.\\n\\niyeshia: let's see. Okay, Jamie is in the name.\\n\\niyeshia: Okay, let's go.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='273-277', metadata={'start': datetime.timedelta(seconds=2622, microseconds=675000), 'end': datetime.timedelta(seconds=2641, microseconds=959000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Okay, let's go.\\n\\niyeshia: 3rd question, what are not considerations to mention when providing reasons for a salary increase.\\n\\niyeshia: There aren't enough.\\n\\niyeshia: Okay? 18. Yes, the cost of living. That is correct. You should not consider that\\n\\niyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='277-281', metadata={'start': datetime.timedelta(seconds=2640, microseconds=140000), 'end': datetime.timedelta(seconds=2695, microseconds=309000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\\n\\niyeshia: or virtual.\\n\\niyeshia: our office in California, we have no idea. So yeah, just just keep that in mind. So good job to the the cost of living folks.\\n\\niyeshia: Okay, David Rv is in the lead.\\n\\niyeshia: Okay, let's go to the next question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='281-285', metadata={'start': datetime.timedelta(seconds=2680, microseconds=250000), 'end': datetime.timedelta(seconds=2715, microseconds=419000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Okay, let's go to the next question.\\n\\niyeshia: what is a thoughtful way to actually negotiate?\\n\\niyeshia: So we can negotiate? Very good. It's a thoughtful way to act\\n\\niyeshia: and I think most of y'all got that in the chat. I saw some other answers. I'm gonna leave that questionable. But for the ones who did shout out to y'all.\\n\\niyeshia: So I think this is the last question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='285-289', metadata={'start': datetime.timedelta(seconds=2712, microseconds=460000), 'end': datetime.timedelta(seconds=2758, microseconds=389000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: So I think this is the last question.\\n\\niyeshia: But Kyle is in the lead now, and so shouts to Kyle. So here goes the last question.\\n\\niyeshia: The most important relationship at work is with my manager.\\n\\niyeshia: Shout out to the people who said, False I said, it is important, but not the most important. Yeah, there's team this\\n\\niyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='289-293', metadata={'start': datetime.timedelta(seconds=2755, microseconds=680000), 'end': datetime.timedelta(seconds=2795, microseconds=579000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\\n\\niyeshia: yeah, let's always about that. So let's go to the windows.\\n\\niyeshia: Okay, let's okay.\\n\\niyeshia: Number one.\\n\\niyeshia: Okay, at the bottom.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='293-297', metadata={'start': datetime.timedelta(seconds=2788, microseconds=670000), 'end': datetime.timedelta(seconds=2827, microseconds=966000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Okay, at the bottom.\\n\\niyeshia: Okay, with that, said\\n\\niyeshia: the last thing I will do. These are some follow up questions that you can ask your career coach. If I'm your career coach, you could definitely ask me that.\\n\\niyeshia: But how much of a raise. Can you ask for? When do you? Should you start a retirement fund? I would say, Asap, how long should you take to figure out if your company is a good fit, and how do you approach a conflict with a manager or coworker? So if you have any questions about those, please feel free to reach out to me or your career coach, if you would like to discuss further details, and I do want to be mindful of time.\\n\\niyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='297-301', metadata={'start': datetime.timedelta(seconds=2822, microseconds=600000), 'end': datetime.timedelta(seconds=2879, microseconds=310000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\\n\\niyeshia: and help me to deliver it better or worse. So if I did a good job, that's great. But I'm going to put this in the chat.\\n\\niyeshia: So you could fill that out now and then. Also want to invite you all to Rsvp. For Ctp's graduation.\\n\\niyeshia: So I would say, you can do that right now as well\\n\\niyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='301-305', metadata={'start': datetime.timedelta(seconds=2870, microseconds=460000), 'end': datetime.timedelta(seconds=2919, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\\n\\niyeshia: and your projects. But for those who can't attend come through. It's going to be great to see your projects to see each other one last time, like Demo Night. And it's gonna be it's going to be a great time as we close out the the cohort in in May. So, and also to Devin's question, just one more time. We won't leave you hanging you will get an invite to be alumni\\n\\niyeshia: for Ctp, and that way you'll be with everybody who did the cohorts before your cohorts, one through 9 and so it'll be one through 10 now. And so that'll be like over a thousand people in that slack channel. So you can definitely network with your peers and the people who came before you. So yeah, just keep that in mind.\\n\\niyeshia: So thank you all. And I will stop sharing.\\n\\niyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='305-309', metadata={'start': datetime.timedelta(seconds=2901, microseconds=130000), 'end': datetime.timedelta(seconds=2988, microseconds=469000), 'speakers': frozenset({'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\\n\\nCUNY Tech Prep (CTP): Definitely. Thank you, Aisha, for the valuable tips. I think. A lot of students, a lot of the students I've spoken to, at least are.\\n\\nCUNY Tech Prep (CTP): have got recently gotten jobs or are very close to getting them, and\\n\\nCUNY Tech Prep (CTP): they will find this material very useful. I'm actually kind of glad I remember to click record at the beginning, because some of them are like in traffic right now.\\n\\niyeshia: Got it. Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='309-313', metadata={'start': datetime.timedelta(seconds=2964, microseconds=60000), 'end': datetime.timedelta(seconds=3011, microseconds=947000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"iyeshia: Got it. Okay.\\n\\niyeshia: I'm glad.\\n\\nCUNY Tech Prep (CTP): Okay, thank you. So I'm gonna give you all 10\\xa0min to fill this out. Since you got 2 things to fill out. One is the inviting yourself to the graduation, and then 2 is the survey.\\n\\nCUNY Tech Prep (CTP): Alright, so we will come back at 7, 35.\\n\\nCUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='313-317', metadata={'start': datetime.timedelta(seconds=3010, microseconds=980000), 'end': datetime.timedelta(seconds=3063, microseconds=720000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\\n\\nCUNY Tech Prep (CTP): There's no homework for the next 2 weeks, and there's spring break. So which means.\\n\\nCUNY Tech Prep (CTP): after this class, I'll be seeing you the second Friday from now.\\n\\nCUNY Tech Prep (CTP): Not next Friday.\\n\\nCUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='317-321', metadata={'start': datetime.timedelta(seconds=3060, microseconds=740000), 'end': datetime.timedelta(seconds=3115, microseconds=180000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text='CUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\\n\\nCUNY Tech Prep (CTP): This is time to do your projects.\\n\\nCUNY Tech Prep (CTP): Alright, so just as a gift to all the people who are in class.\\n\\nCUNY Tech Prep (CTP): If you check the homework sheet.\\n\\nCUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='321-325', metadata={'start': datetime.timedelta(seconds=3110, microseconds=350000), 'end': datetime.timedelta(seconds=3275, microseconds=10000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.\\n\\nCUNY Tech Prep (CTP): I'll let you figure out which one that is\\n\\nCUNY Tech Prep (CTP): alright. We're back.\\n\\nCUNY Tech Prep (CTP): So go for the rest of this day. So we're gonna I'm gonna put you in breakout rooms\\n\\nCUNY Tech Prep (CTP): for your projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='325-329', metadata={'start': datetime.timedelta(seconds=3269, microseconds=390000), 'end': datetime.timedelta(seconds=3591, microseconds=359000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text='CUNY Tech Prep (CTP): for your projects.\\n\\nCUNY Tech Prep (CTP): And what I want you to do is I need to think about the state of the project. You, the the state the project is in.\\n\\nCUNY Tech Prep (CTP): I will be coming around to check in\\n\\nCUNY Tech Prep (CTP): because you have 2 weeks and no homework.\\n\\nCUNY Tech Prep (CTP): I want you to put your all into the project. So', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='329-333', metadata={'start': datetime.timedelta(seconds=3589, microseconds=600000), 'end': datetime.timedelta(seconds=3613, microseconds=269000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text='CUNY Tech Prep (CTP): I want you to put your all into the project. So\\n\\nCUNY Tech Prep (CTP): let me make the breakout rooms first.st\\n\\nCUNY Tech Prep (CTP): Basically, what I want you to do is plan out the next 2 weeks. Okay, what do you want? What? What is missing from\\n\\nCUNY Tech Prep (CTP): your project that you need to complete it?\\n\\nCUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='333-337', metadata={'start': datetime.timedelta(seconds=3609, microseconds=440000), 'end': datetime.timedelta(seconds=3646, microseconds=619000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?\\n\\nCUNY Tech Prep (CTP): Because after the next 2 weeks you literally have only 2 weeks left.\\n\\nCUNY Tech Prep (CTP): There's class. There's week 11, and then there's week 12\\n\\nCUNY Tech Prep (CTP): week. 13 is like May May 10th or May 9, th\\n\\nCUNY Tech Prep (CTP): and then the week after that, I believe, is\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='337-341', metadata={'start': datetime.timedelta(seconds=3643, microseconds=720000), 'end': datetime.timedelta(seconds=3672, microseconds=696000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
+       " Chunk(text=\"CUNY Tech Prep (CTP): and then the week after that, I believe, is\\n\\nCUNY Tech Prep (CTP): when you're going to do Demos.\\n\\nCUNY Tech Prep (CTP): I could be wrong.\\n\\nCUNY Tech Prep (CTP): Alright. You can pick the rooms. Now go into your rooms.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='341-344', metadata={'start': datetime.timedelta(seconds=3670, microseconds=320000), 'end': datetime.timedelta(seconds=3682, microseconds=370000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}))"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "web_vtt_content.get_chunks()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

pyproject.toml CHANGED Viewed

@@ -21,10 +21,10 @@ classifiers = [
 dependencies = [
     "pydantic>=2.11.2",
     "pydantic-settings>=2.8.1",
     "more-itertools>=10.6.0",
     "python-dotenv>=1.1.0",
     "loguru>=0.7.3",
-    "fastapi>=0.115.12",
     "dependency-injector>=4.46.0",
     "pytz>=2025.2",
     "apscheduler>=3.11.0",
@@ -36,7 +36,10 @@ dependencies = [
     "slack_bolt>=1.23.0",
     "pymongo>=4.11.3 ",
     "motor>=3.7.0",
-    "openai>=1.70.0"
 ]
 [project.optional-dependencies]
@@ -47,7 +50,7 @@ dev = [
     "types-pytz>=2025.2",
     "black>=25.1.0",
     "isort>=6.0.1",
-    "ruff>=0.11.4",
 ]
 [project.urls]

 dependencies = [
     "pydantic>=2.11.2",
     "pydantic-settings>=2.8.1",
+    "cachetools>=5.5.2",
     "more-itertools>=10.6.0",
     "python-dotenv>=1.1.0",
     "loguru>=0.7.3",
     "dependency-injector>=4.46.0",
     "pytz>=2025.2",
     "apscheduler>=3.11.0",
     "slack_bolt>=1.23.0",
     "pymongo>=4.11.3 ",
     "motor>=3.7.0",
+    "openai>=1.70.0",
+    "google-api-python-client>=2.167.0",
+    "google-auth>=2.39.0",
+    "google-auth-oauthlib>=1.2.1"
 ]
 [project.optional-dependencies]
     "types-pytz>=2025.2",
     "black>=25.1.0",
     "isort>=6.0.1",
+    "ruff>=0.11.4"
 ]
 [project.urls]

src/ctp_slack_bot/app.py CHANGED Viewed

@@ -1,9 +1,24 @@
-from asyncio import run
 from loguru import logger
 from ctp_slack_bot.containers import Container
 from ctp_slack_bot.core.logging import setup_logging
 async def main() -> None:
     # Setup logging.
     setup_logging()
@@ -16,19 +31,23 @@ async def main() -> None:
     # Kick off services which should be active from the start.
     container.content_ingestion_service()
     container.question_dispatch_service()
-    # Start the scheduler.
-    schedule_service = container.schedule_service()
-    schedule_service.start()
-    # Start the Slack socket mode handler in a background thread.
     socket_mode_handler = container.socket_mode_handler()
-    logger.info("Starting Slack Socket Mode handler…")
-    await socket_mode_handler.start_async()
-    # Shutdown. (This will never execute, because the socket mode handler never returns.)
-    logger.info("Shutting down application…")
-    schedule_service.stop()
 if __name__ == "__main__":
     run(main())

+from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
 from loguru import logger
+from signal import SIGINT, SIGTERM
+from typing import Any, Callable
 from ctp_slack_bot.containers import Container
 from ctp_slack_bot.core.logging import setup_logging
+async def handle_shutdown_signal() -> None:
+    logger.info("Received shutdown signal.")
+    for task in all_tasks():
+        if task is not current_task() and not task.done():
+            task.cancel()
+            logger.trace("Cancelled task {}.", task.get_name())
+    logger.info("Cancelled all tasks.")
+def create_shutdown_signal_handler() -> Callable[[], None]:
+    def shutdown_signal_handler() -> None:
+        create_task(handle_shutdown_signal())
+    return shutdown_signal_handler
 async def main() -> None:
     # Setup logging.
     setup_logging()
     # Kick off services which should be active from the start.
     container.content_ingestion_service()
     container.question_dispatch_service()
+    container.schedule_service()
+    # Start the Slack socket mode handler in the background.
     socket_mode_handler = container.socket_mode_handler()
+    slack_bolt_task = create_task(socket_mode_handler.start_async())
+    shutdown_signal_handler = create_shutdown_signal_handler()
+    loop = get_running_loop()
+    loop.add_signal_handler(SIGINT, shutdown_signal_handler)
+    loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
+    try:
+        logger.info("Starting Slack Socket Mode handler…")
+        await slack_bolt_task
+    except CancelledError:
+        logger.info("Shutting down application…")
+    finally:
+        await socket_mode_handler.close_async()
+        await container.shutdown_resources()
 if __name__ == "__main__":
     run(main())

src/ctp_slack_bot/containers.py CHANGED Viewed

@@ -11,9 +11,10 @@ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionSer
 from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
 from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 from ctp_slack_bot.services.language_model_service import LanguageModelService
 from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
-from ctp_slack_bot.services.schedule_service import ScheduleService
 from ctp_slack_bot.services.slack_service import SlackServiceResource
 from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
 from ctp_slack_bot.services.vectorization_service import VectorizationService
@@ -22,7 +23,7 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
 class Container(DeclarativeContainer):
     settings = Singleton(Settings)
     event_brokerage_service = Singleton(EventBrokerageService)
-    schedule_service = Singleton(ScheduleService, settings=settings)
     mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
     vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
     vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
@@ -36,3 +37,4 @@ class Container(DeclarativeContainer):
     slack_bolt_app = Singleton(AsyncApp, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value())
     slack_service = Resource(SlackServiceResource, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app)
     socket_mode_handler = Singleton(lambda _, app, app_token: AsyncSocketModeHandler(app, app_token), slack_service, slack_bolt_app, settings.provided.SLACK_APP_TOKEN().get_secret_value())

 from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
 from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
+from ctp_slack_bot.services.google_drive_service import GoogleDriveService
 from ctp_slack_bot.services.language_model_service import LanguageModelService
 from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
+from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
 from ctp_slack_bot.services.slack_service import SlackServiceResource
 from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
 from ctp_slack_bot.services.vectorization_service import VectorizationService
 class Container(DeclarativeContainer):
     settings = Singleton(Settings)
     event_brokerage_service = Singleton(EventBrokerageService)
+    schedule_service = Resource(ScheduleServiceResource, settings=settings)
     mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
     vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
     vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
     slack_bolt_app = Singleton(AsyncApp, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value())
     slack_service = Resource(SlackServiceResource, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app)
     socket_mode_handler = Singleton(lambda _, app, app_token: AsyncSocketModeHandler(app, app_token), slack_service, slack_bolt_app, settings.provided.SLACK_APP_TOKEN().get_secret_value())
+    google_drive_service = Singleton(GoogleDriveService, settings=settings)

src/ctp_slack_bot/core/config.py CHANGED Viewed

@@ -39,7 +39,7 @@ class Settings(BaseSettings):
     SCORE_THRESHOLD: NonNegativeFloat
     # Hugging Face Configuration
-    HF_API_TOKEN: Optional[SecretStr] = None
     # OpenAI Configuration
     OPENAI_API_KEY: SecretStr
@@ -48,6 +48,22 @@ class Settings(BaseSettings):
     TEMPERATURE: NonNegativeFloat
     SYSTEM_PROMPT: str
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",

     SCORE_THRESHOLD: NonNegativeFloat
     # Hugging Face Configuration
+    HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
     # OpenAI Configuration
     OPENAI_API_KEY: SecretStr
     TEMPERATURE: NonNegativeFloat
     SYSTEM_PROMPT: str
+    # Google Drive Configuration
+    GOOGLE_DRIVE_ROOT_ID: str
+    GOOGLE_PROJECT_ID: str
+    GOOGLE_PRIVATE_KEY_ID: SecretStr
+    GOOGLE_PRIVATE_KEY: SecretStr
+    GOOGLE_CLIENT_ID: str
+    GOOGLE_CLIENT_EMAIL: str
+    GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth"
+    GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
+    GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs"
+    GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com"
+    GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com"
+    # File Monitoring Configuration
+    FILE_MONITOR_ROOT_PATH: Optional[str] = None
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",

src/ctp_slack_bot/core/logging.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
 from loguru import logger
 from os import getenv
 from sys import stderr
@@ -90,7 +90,9 @@ def setup_logging() -> None:
     basicConfig(handlers=[InterceptHandler()], level=0, force=True)
     # Update logging levels for some noisy libraries.
-    for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"):
         getLogger(logger_name).setLevel(INFO)
     logger.info(f"Logging configured with level {log_level}")

+from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
 from loguru import logger
 from os import getenv
 from sys import stderr
     basicConfig(handlers=[InterceptHandler()], level=0, force=True)
     # Update logging levels for some noisy libraries.
+    for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
         getLogger(logger_name).setLevel(INFO)
+    for logger_name in ("apscheduler"):
+        getLogger(logger_name).setLevel(WARNING)
     logger.info(f"Logging configured with level {log_level}")

src/ctp_slack_bot/core/response_rendering.py DELETED Viewed

@@ -1,13 +0,0 @@
-from json import dumps
-from starlette.responses import JSONResponse
-from typing import Any, Self
-class PrettyJSONResponse(JSONResponse):
-    def render(self: Self, content: Any) -> bytes:
-        return dumps(
-            content,
-            ensure_ascii=False,
-            allow_nan=False,
-            indent=4,
-            separators=(", ", ": "),
-        ).encode()

src/ctp_slack_bot/db/mongo_db.py CHANGED Viewed

@@ -1,13 +1,14 @@
-from dependency_injector.resources import Resource
 from motor.motor_asyncio import AsyncIOMotorClient
 from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
 from pymongo.operations import SearchIndexModel
 from loguru import logger
 from pydantic import BaseModel, PrivateAttr
 from typing import Any, Dict, Optional, Self
-import asyncio
 from ctp_slack_bot.core.config import Settings
 class MongoDB(BaseModel):
     """
@@ -16,23 +17,20 @@ class MongoDB(BaseModel):
     settings: Settings
     _client: PrivateAttr = PrivateAttr()
     _db: PrivateAttr = PrivateAttr()
     class Config:
         arbitrary_types_allowed = True
     def __init__(self: Self, **data: Dict[str, Any]) -> None:
         super().__init__(**data)
         logger.debug("Created {}", self.__class__.__name__)
     def connect(self: Self) -> None:
         """Initialize MongoDB client with settings."""
         try:
             connection_string = self.settings.MONGODB_URI.get_secret_value()
-            logger.debug("Connecting to MongoDB using URI: {}", connection_string.replace(
-                connection_string.split('@')[-1].split('/')[0] if '@' in connection_string else '',
-                '[REDACTED]'
-            ))
             # Create client with appropriate settings
             self._client = AsyncIOMotorClient(
                 connection_string,
@@ -43,48 +41,48 @@ class MongoDB(BaseModel):
                 retryWrites=True,
                 w="majority"
             )
             # Set database
             db_name = self.settings.MONGODB_NAME
             self._db = self._client[db_name]
             logger.debug("MongoDB client initialized for database: {}", db_name)
         except Exception as e:
             logger.error("Failed to initialize MongoDB client: {}", e)
             self._client = None
             self._db = None
             raise
     @property
     def client(self: Self) -> AsyncIOMotorClient:
         """Get the MongoDB client instance."""
         if not hasattr(self, '_client') or self._client is None:
-            logger.warning("MongoDB client not initialized. Attempting to initialize.")
             self.connect()
             if not hasattr(self, '_client') or self._client is None:
-                raise ConnectionError("Failed to initialize MongoDB client")
         return self._client
     @property
     def db(self: Self) -> Any:
         """Get the MongoDB database instance."""
         if not hasattr(self, '_db') or self._db is None:
-            logger.warning("MongoDB database not initialized. Attempting to initialize client.")
             self.connect()
             if not hasattr(self, '_db') or self._db is None:
-                raise ConnectionError("Failed to initialize MongoDB database")
         return self._db
     async def ping(self: Self) -> bool:
         """Check if MongoDB connection is alive."""
         try:
             # Get client to ensure we're connected
             client = self.client
             # Try a simple ping command
             await client.admin.command('ping')
-            logger.debug("MongoDB connection is active")
             return True
         except (ConnectionFailure, ServerSelectionTimeoutError) as e:
             logger.error("MongoDB connection failed: {}", e)
@@ -92,7 +90,7 @@ class MongoDB(BaseModel):
         except Exception as e:
             logger.error("Unexpected error during MongoDB ping: {}", e)
             return False
     async def get_collection(self: Self, name: str) -> Any:
         """
         Get a collection by name with validation.
@@ -100,29 +98,29 @@ class MongoDB(BaseModel):
         """
         # First ensure we can connect at all
         if not await self.ping():
-            logger.error("Cannot get collection '{}' - MongoDB connection is not available", name)
-            raise ConnectionError("MongoDB connection is not available")
         try:
             # Get all collection names to check if this one exists
-            logger.debug("Checking if collection '{}' exists", name)
             collection_names = await self.db.list_collection_names()
             if name not in collection_names:
-                logger.info("Collection '{}' does not exist. Creating it.", name)
                 # Create the collection
                 await self.db.create_collection(name)
-                logger.debug("Successfully created collection '{}'", name)
             else:
-                logger.debug("Collection '{}' already exists", name)
             # Get and return the collection
             collection = self.db[name]
             return collection
         except Exception as e:
             logger.error("Error accessing collection '{}': {}", name, e)
             raise
     async def create_indexes(self: Self, collection_name: str) -> None:
         """
         Create a vector search index on a collection.
@@ -131,7 +129,7 @@ class MongoDB(BaseModel):
             collection_name: Name of the collection
         """
         collection = await self.get_collection(collection_name)
         try:
             # Create search index model using MongoDB's recommended approach
             search_index_model = SearchIndexModel(
@@ -149,41 +147,38 @@ class MongoDB(BaseModel):
                 name=f"{collection_name}_vector_index",
                 type="vectorSearch"
             )
             # Create the search index using the motor collection
             result = await collection.create_search_index(search_index_model)
-            logger.info("Vector search index '{}' created for collection {}", result, collection_name)
         except Exception as e:
             if "command not found" in str(e).lower():
                 logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
                 # Create a fallback standard index on embedding field
                 await collection.create_index("embedding")
-                logger.info("Created standard index on 'embedding' field as fallback")
             else:
                 logger.error("Failed to create vector index: {}", e)
                 raise
     async def close(self: Self) -> None:
         """Close MongoDB connection."""
         if self._client:
             self._client.close()
-            logger.info("MongoDB connection closed")
             self._client = None
             self._db = None
-class MongoDBResource(Resource):
-    def init(self: Self, settings: Settings) -> MongoDB:
         logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
         mongo_db = MongoDB(settings=settings)
         mongo_db.connect()
-        # Test the connection asynchronously - this will run after init returns
-        asyncio.create_task(self._test_connection(mongo_db))
         return mongo_db
-    async def _test_connection(self, mongo_db: MongoDB) -> None:
         """Test MongoDB connection and log the result."""
         try:
             is_connected = await mongo_db.ping()
@@ -193,11 +188,11 @@ class MongoDBResource(Resource):
                 logger.error("MongoDB connection test failed!")
         except Exception as e:
             logger.error("Error testing MongoDB connection: {}", e)
     async def shutdown(self: Self, mongo_db: MongoDB) -> None:
         """Close MongoDB connection on shutdown."""
         try:
-            logger.info("Closing MongoDB connection...")
             await mongo_db.close()
         except Exception as e:
             logger.error("Error closing MongoDB connection: {}", e)

+from asyncio import create_task
+from dependency_injector.resources import AsyncResource
 from motor.motor_asyncio import AsyncIOMotorClient
 from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
 from pymongo.operations import SearchIndexModel
 from loguru import logger
 from pydantic import BaseModel, PrivateAttr
 from typing import Any, Dict, Optional, Self
 from ctp_slack_bot.core.config import Settings
+from ctp_slack_bot.utils import sanitize_mongo_db_uri
 class MongoDB(BaseModel):
     """
     settings: Settings
     _client: PrivateAttr = PrivateAttr()
     _db: PrivateAttr = PrivateAttr()
     class Config:
         arbitrary_types_allowed = True
     def __init__(self: Self, **data: Dict[str, Any]) -> None:
         super().__init__(**data)
         logger.debug("Created {}", self.__class__.__name__)
     def connect(self: Self) -> None:
         """Initialize MongoDB client with settings."""
         try:
             connection_string = self.settings.MONGODB_URI.get_secret_value()
+            logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
             # Create client with appropriate settings
             self._client = AsyncIOMotorClient(
                 connection_string,
                 retryWrites=True,
                 w="majority"
             )
             # Set database
             db_name = self.settings.MONGODB_NAME
             self._db = self._client[db_name]
             logger.debug("MongoDB client initialized for database: {}", db_name)
         except Exception as e:
             logger.error("Failed to initialize MongoDB client: {}", e)
             self._client = None
             self._db = None
             raise
     @property
     def client(self: Self) -> AsyncIOMotorClient:
         """Get the MongoDB client instance."""
         if not hasattr(self, '_client') or self._client is None:
+            logger.warning("MongoDB client not initialized. Attempting to initialize…")
             self.connect()
             if not hasattr(self, '_client') or self._client is None:
+                raise ConnectionError("Failed to initialize MongoDB client.")
         return self._client
     @property
     def db(self: Self) -> Any:
         """Get the MongoDB database instance."""
         if not hasattr(self, '_db') or self._db is None:
+            logger.warning("MongoDB database not initialized. Attempting to initialize client…")
             self.connect()
             if not hasattr(self, '_db') or self._db is None:
+                raise ConnectionError("Failed to initialize MongoDB database.")
         return self._db
     async def ping(self: Self) -> bool:
         """Check if MongoDB connection is alive."""
         try:
             # Get client to ensure we're connected
             client = self.client
             # Try a simple ping command
             await client.admin.command('ping')
+            logger.debug("MongoDB connection is active!")
             return True
         except (ConnectionFailure, ServerSelectionTimeoutError) as e:
             logger.error("MongoDB connection failed: {}", e)
         except Exception as e:
             logger.error("Unexpected error during MongoDB ping: {}", e)
             return False
     async def get_collection(self: Self, name: str) -> Any:
         """
         Get a collection by name with validation.
         """
         # First ensure we can connect at all
         if not await self.ping():
+            logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name)
+            raise ConnectionError("MongoDB connection is not available.")
         try:
             # Get all collection names to check if this one exists
+            logger.debug("Checking if collection '{}' exists…", name)
             collection_names = await self.db.list_collection_names()
             if name not in collection_names:
+                logger.info("Collection '{}' does not exist. Creating it…", name)
                 # Create the collection
                 await self.db.create_collection(name)
+                logger.debug("Successfully created collection: {}", name)
             else:
+                logger.debug("Collection '{}' already exists!", name)
             # Get and return the collection
             collection = self.db[name]
             return collection
         except Exception as e:
             logger.error("Error accessing collection '{}': {}", name, e)
             raise
     async def create_indexes(self: Self, collection_name: str) -> None:
         """
         Create a vector search index on a collection.
             collection_name: Name of the collection
         """
         collection = await self.get_collection(collection_name)
         try:
             # Create search index model using MongoDB's recommended approach
             search_index_model = SearchIndexModel(
                 name=f"{collection_name}_vector_index",
                 type="vectorSearch"
             )
             # Create the search index using the motor collection
             result = await collection.create_search_index(search_index_model)
+            logger.info("Vector search index '{}' created for collection {}.", result, collection_name)
         except Exception as e:
             if "command not found" in str(e).lower():
                 logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
                 # Create a fallback standard index on embedding field
                 await collection.create_index("embedding")
+                logger.info("Created standard index on 'embedding' field as fallback.")
             else:
                 logger.error("Failed to create vector index: {}", e)
                 raise
     async def close(self: Self) -> None:
         """Close MongoDB connection."""
         if self._client:
             self._client.close()
+            logger.info("Closed MongoDB connection.")
             self._client = None
             self._db = None
+class MongoDBResource(AsyncResource):
+    async def init(self: Self, settings: Settings) -> MongoDB:
         logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
         mongo_db = MongoDB(settings=settings)
         mongo_db.connect()
+        await self._test_connection(mongo_db)
         return mongo_db
+    async def _test_connection(self: Self, mongo_db: MongoDB) -> None:
         """Test MongoDB connection and log the result."""
         try:
             is_connected = await mongo_db.ping()
                 logger.error("MongoDB connection test failed!")
         except Exception as e:
             logger.error("Error testing MongoDB connection: {}", e)
+            raise
     async def shutdown(self: Self, mongo_db: MongoDB) -> None:
         """Close MongoDB connection on shutdown."""
         try:
             await mongo_db.close()
         except Exception as e:
             logger.error("Error closing MongoDB connection: {}", e)

src/ctp_slack_bot/models/__init__.py CHANGED Viewed

@@ -1,2 +1,4 @@
 from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk, VectorQuery
 from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair

 from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk, VectorQuery
+from ctp_slack_bot.models.google_drive import GoogleDriveMetadata
 from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair
+from ctp_slack_bot.models.webvtt import WebVTTContent, WebVTTFrame

src/ctp_slack_bot/models/base.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from abc import ABC, abstractmethod
 from pydantic import BaseModel, ConfigDict, Field
-from types import MappingProxyType
-from typing import Any, Dict, final, Mapping, Self, Sequence, Optional
 class Chunk(BaseModel):
@@ -14,6 +13,7 @@ class Chunk(BaseModel):
     model_config = ConfigDict(frozen=True)
 @final
 class VectorQuery(BaseModel):
     """Model for vector database similarity search queries.
@@ -30,12 +30,14 @@ class VectorQuery(BaseModel):
     score_threshold: float = Field(default=0.7)
     filter_metadata: Optional[Mapping[str, Any]] = None
 @final
 class VectorizedChunk(Chunk):
     """A class representing a vectorized chunk of content."""
-    embedding: Sequence[float]  # The vector representation
 class Content(ABC, BaseModel):
@@ -44,22 +46,13 @@ class Content(ABC, BaseModel):
     model_config = ConfigDict(frozen=True)
     @abstractmethod
-    def get_chunks(self: Self) -> Sequence[Chunk]:
         pass
     @abstractmethod
-    def get_metadata(self: Self) -> Mapping[str, Any]:
-        pass
-    @abstractmethod
-    def get_text(self: Self) -> str:
-        pass
-    @abstractmethod
-    def get_bytes(self: Self) -> bytes:
         pass
-    @property
     @abstractmethod
-    def id(self: Self) -> str:
         pass

 from abc import ABC, abstractmethod
 from pydantic import BaseModel, ConfigDict, Field
+from typing import Any, final, Mapping, Self, Sequence, Optional
 class Chunk(BaseModel):
     model_config = ConfigDict(frozen=True)
 @final
 class VectorQuery(BaseModel):
     """Model for vector database similarity search queries.
     score_threshold: float = Field(default=0.7)
     filter_metadata: Optional[Mapping[str, Any]] = None
+    model_config = ConfigDict(frozen=True)
 @final
 class VectorizedChunk(Chunk):
     """A class representing a vectorized chunk of content."""
+    embedding: Sequence[float] # The vector representation
 class Content(ABC, BaseModel):
     model_config = ConfigDict(frozen=True)
     @abstractmethod
+    def get_id(self: Self) -> str:
         pass
     @abstractmethod
+    def get_chunks(self: Self) -> Sequence[Chunk]:
         pass
     @abstractmethod
+    def get_metadata(self: Self) -> Mapping[str, Any]:
         pass

src/ctp_slack_bot/models/google_drive.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from datetime import datetime
+from pydantic import BaseModel, ConfigDict
+from typing import Self
+from ctp_slack_bot.models import FileContent
+class GoogleDriveMetadata(BaseModel):
+    """Represents Google Drive file or folder metadata."""
+    id: str
+    name: str
+    modified_time: datetime
+    mime_type: str
+    folder_path: str
+    model_config = ConfigDict(frozen=True)
+    @classmethod
+    def from_folder_path_and_dict(cls: type["GoogleDriveMetadata"], folder_path: str, dict: dict) -> Self:
+        id = dict["id"]
+        name = dict["name"]
+        modified_time = datetime.fromisoformat(dict["modifiedTime"])
+        mime_type = dict["mimeType"]
+        return GoogleDriveMetadata(id=id, name=name, modified_time=modified_time, mime_type=mime_type, folder_path=folder_path)

src/ctp_slack_bot/models/slack.py CHANGED Viewed

@@ -63,31 +63,19 @@ class SlackMessage(Content):
     is_starred: Optional[bool] = None
     pinned_to: Optional[Sequence[str]] = None
     reactions: Optional[Sequence[SlackReaction]] = None
-    _canonical_json: PrivateAttr
-    def __init__(self: Self, **data: Dict[str, Any]) -> None:
-        super().__init__(**data)
-        self._canonical_json = PrivateAttr(default_factory=lambda: dumps(data, sort_keys=True).encode())
     def get_chunks(self: Self) -> Sequence[Chunk]:
-        return (Chunk(text=self.text, parent_id=self.id, chunk_id="", metadata=self.get_metadata()), )
     def get_metadata(self: Self) -> Mapping[str, Any]:
         return MappingProxyType({
             "modificationTime": datetime.fromtimestamp(float(self.ts))
         })
-    def get_text(self: Self) -> str:
-        return self.text
-    def get_bytes(self: Self) -> bytes:
-        return self._canonical_json
-    @property
-    def id(self: Self) -> str:
-        """Unique identifier for this message."""
-        return f"slack-message:{self.channel}:{self.ts}"
 class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
     """Represents a response message to be sent to Slack."""

     is_starred: Optional[bool] = None
     pinned_to: Optional[Sequence[str]] = None
     reactions: Optional[Sequence[SlackReaction]] = None
+    def get_id(self: Self) -> str:
+        """Unique identifier for this message."""
+        return f"slack-message:{self.channel}:{self.ts}"
     def get_chunks(self: Self) -> Sequence[Chunk]:
+        return (Chunk(text=self.text, parent_id=self.get_id(), chunk_id="", metadata=self.get_metadata()), )
     def get_metadata(self: Self) -> Mapping[str, Any]:
         return MappingProxyType({
             "modificationTime": datetime.fromtimestamp(float(self.ts))
         })
 class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
     """Represents a response message to be sent to Slack."""

src/ctp_slack_bot/models/webvtt.py CHANGED Viewed

@@ -1,15 +1,18 @@
 from datetime import datetime, timedelta
 from io import BytesIO
 from json import dumps
-from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr
-import re
 from types import MappingProxyType
 from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence
 from webvtt import Caption, WebVTT
 from ctp_slack_bot.models.base import Chunk, Content
-SPEAKER_SPEECH_CAPTION_TEXT_PATTERN = re.compile('(?:([^:]+): )?(.*)')
 class WebVTTFrame(BaseModel):
     """Represents a WebVTT frame"""
@@ -23,54 +26,48 @@ class WebVTTFrame(BaseModel):
     model_config = ConfigDict(frozen=True)
     @classmethod
-    def from_webvtt_caption(cls: type["WebVTTFrame"], caption: Caption) -> Self:
-        identifier = caption.identifier
         start = timedelta(**caption.start_time.__dict__)
         end = timedelta(**caption.end_time.__dict__)
-        speech = caption.text
-        match SPEAKER_SPEECH_CAPTION_TEXT_PATTERN.search(speech).groups():
-            case (speaker, speech):
                 return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
-            case _:
                 return cls(identifier=identifier, start=start, end=end, speech=speech)
-class WebVTTFile(Content): # TODO: insert a FileContent class in the object inheritance hierarchy.
-    """Represents a message from Slack after adaptation."""
-    filename: str
-    modification_time: datetime
-    bytes: bytes
     def get_chunks(self: Self) -> Sequence[Chunk]:
-        return tuple(Chunk(text=frame.speech,
-                           parent_id=self.id,
-                           chunk_id=frame.identifier,
                            metadata={
-                               "filename": self.filename,
-                               "start": self.modification_time + frame.start,
-                               "end": self.modification_time + frame.end,
-                               "user": frame.speaker
                            })
-                     for frame
-                     in self.get_frames())
     def get_metadata(self: Self) -> Mapping[str, Any]:
-        return MappingProxyType({
-            "filename": self.filename,
-            "modificationTime": self.modification_time
-        })
-    def get_text(self: Self) -> str: # TODO
-        raise NotImplemented()
-    def get_bytes(self: Self) -> bytes:
-        return self.bytes
-    def get_frames(self: Self) -> Sequence[WebVTTFrame]:
-        return tuple(map(WebVTTFrame.from_webvtt_caption, WebVTT.from_buffer(BytesIO(buffer)).captions))
-    @property
-    def id(self: Self) -> str:
-        return f"file:{self.filename}"

 from datetime import datetime, timedelta
 from io import BytesIO
+from itertools import starmap
 from json import dumps
+from more_itertools import windowed
+from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr
 from types import MappingProxyType
 from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence
 from webvtt import Caption, WebVTT
 from ctp_slack_bot.models.base import Chunk, Content
+CHUNK_FRAMES_OVERLAP = 1
+CHUNK_FRAMES_WINDOW = 5
+SPEAKER_SPEECH_TEXT_SEPARATOR = ": "
 class WebVTTFrame(BaseModel):
     """Represents a WebVTT frame"""
     model_config = ConfigDict(frozen=True)
     @classmethod
+    def from_webvtt_caption(cls: type["WebVTTFrame"], index: int, caption: Caption) -> Self:
+        identifier = caption.identifier if caption.identifier else str(index)
         start = timedelta(**caption.start_time.__dict__)
         end = timedelta(**caption.end_time.__dict__)
+        match caption.text.split(SPEAKER_SPEECH_TEXT_SEPARATOR, 1):
+            case [speaker, speech]:
                 return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
+            case [speech]:
                 return cls(identifier=identifier, start=start, end=end, speech=speech)
+class WebVTTContent(Content):
+    """Represents parsed WebVTT content."""
+    id: str
+    metadata: Mapping[str, Any] = Field(default_factory=dict)
+    frames: Sequence[WebVTTFrame]
+    def get_id(self: Self) -> str:
+        return self.id
     def get_chunks(self: Self) -> Sequence[Chunk]:
+        windows = (tuple(filter(None, window))
+                   for window
+                   in windowed(self.frames, CHUNK_FRAMES_WINDOW, step=CHUNK_FRAMES_WINDOW-CHUNK_FRAMES_OVERLAP))
+        return tuple(Chunk(text="\n\n".join(": ".join(filter(None, (frame.speaker, frame.speech)))
+                                            for frame
+                                            in frames),
+                           parent_id=self.get_id(),
+                           chunk_id=f"{frames[0].identifier}-{frames[-1].identifier}",
                            metadata={
+                               "start": str(frames[0].start), # TODO: This is a harder problem: to get the offsets to become real datetimes so that they can be queryable using MongoDB.
+                               "end": str(frames[-1].end),
+                               "speakers": [frame.speaker for frame in frames if frame.speaker]
                            })
+                     for frames
+                     in windows)
     def get_metadata(self: Self) -> Mapping[str, Any]:
+        return MappingProxyType(self.metadata)
+    @classmethod
+    def from_bytes(cls: type["WebVTTContent"], id: str, metadata: Mapping[str, Any], buffer: bytes) -> Self:
+        frames = tuple(starmap(WebVTTFrame.from_webvtt_caption, enumerate(WebVTT.from_buffer(BytesIO(buffer)).captions, 1)))
+        return WebVTTContent(id=id, metadata=MappingProxyType(metadata), frames=frames)

src/ctp_slack_bot/services/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionSer
 from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
 from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 from ctp_slack_bot.services.language_model_service import LanguageModelService
 from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
 from ctp_slack_bot.services.slack_service import SlackService

 from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
 from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
+from ctp_slack_bot.services.google_drive_service import GoogleDriveService
 from ctp_slack_bot.services.language_model_service import LanguageModelService
 from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
 from ctp_slack_bot.services.slack_service import SlackService

src/ctp_slack_bot/services/application_database_service.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from datetime import datetime
+from loguru import logger
+from pydantic import BaseModel, PrivateAttr
+from typing import Iterable, Mapping, Self
+from ctp_slack_bot.core import Settings
+from ctp_slack_bot.db import MongoDB
+class ApplicationDatabaseService(BaseModel):
+    """Service for retrieving and persisting application state."""
+    settings: Settings
+    mongo_db: MongoDB # TODO: This should be replaced following the repository pattern―one repository class per collection.
+    class Config:
+        frozen=True
+    def __init__(self: Self, **data) -> None:
+        super().__init__(**data)
+        logger.debug("Created {}", self.__class__.__name__)
+    async def get_last_modification_times_by_file_paths(self: Self, file_paths: Iterable[str]) -> Mapping[str, datetime]:
+        """Retrieve the last modification time for each file path."""
+        raise NotImplementedError() # TODO
+    async def set_last_modification_time_by_file_path(self: Self, file_path: str, modification_time: datetime) -> None:
+        """Set the last modification time for a file path."""
+        raise NotImplementedError() # TODO

src/ctp_slack_bot/services/content_ingestion_service.py CHANGED Viewed

@@ -30,8 +30,8 @@ class ContentIngestionService(BaseModel):
     async def process_incoming_content(self: Self, content: Content) -> None:
         logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
-        # if self.vector_database_service.has_content(content.id) # TODO
-        #    logger.debug("Ignored content with ID {} because it already exists in the database.", content.id)
         #    return
         chunks = content.get_chunks()
         await self.__vectorize_and_store_chunks_in_database(chunks)

     async def process_incoming_content(self: Self, content: Content) -> None:
         logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
+        # if self.vector_database_service.has_content(content.get_id()) # TODO
+        #    logger.debug("Ignored content with ID {} because it already exists in the database.", content.get_id())
         #    return
         chunks = content.get_chunks()
         await self.__vectorize_and_store_chunks_in_database(chunks)

src/ctp_slack_bot/services/google_drive_service.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from datetime import datetime
+from cachetools import TTLCache
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseDownload
+from googleapiclient.errors import HttpError
+from io import BytesIO
+from loguru import logger
+from pydantic import BaseModel, PrivateAttr
+from typing import Collection, Dict, List, Optional, Self
+from ctp_slack_bot.core import Settings
+from ctp_slack_bot.models import GoogleDriveMetadata
+FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
+PATH_SEPARATOR: str = "/"
+class GoogleDriveService(BaseModel):
+    """Service for interacting with Google Drive."""
+    settings: Settings
+    _google_drive_client: PrivateAttr = PrivateAttr()
+    _folder_cache: PrivateAttr = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
+    class Config:
+        frozen=True
+    def __init__(self: Self, **data) -> None:
+        super().__init__(**data)
+        credentials = service_account.Credentials.from_service_account_info({
+            "type": "service_account",
+            "project_id": self.settings.GOOGLE_PROJECT_ID,
+            "private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID.get_secret_value(),
+            "private_key": self.settings.GOOGLE_PRIVATE_KEY.get_secret_value(),
+            "client_email": self.settings.GOOGLE_CLIENT_EMAIL,
+            "client_id": self.settings.GOOGLE_CLIENT_ID,
+            "token_uri": self.settings.GOOGLE_TOKEN_URI,
+        }, scopes=["https://www.googleapis.com/auth/drive"])
+        self._google_drive_client = build('drive', 'v3', credentials=credentials)
+        logger.debug("Created {}", self.__class__.__name__)
+    def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
+        """Resolve a folder path to a Google Drive ID."""
+        if not folder_path:
+            return self.settings.GOOGLE_DRIVE_ROOT_ID
+        if folder_path in self._folder_cache:
+            return self._folder_cache[folder_path]
+        current_id = self.settings.GOOGLE_DRIVE_ROOT_ID
+        try:
+            for part in folder_path.split(PATH_SEPARATOR):
+                results = self._google_drive_client.files().list(
+                    q=f"name='{part.replace("\\", "\\\\").replace("'", "\\'")}' and mimeType='{FOLDER_MIME_TYPE}' and '{current_id}' in parents",
+                    fields="files(id,name)",
+                    supportsAllDrives=True,
+                    includeItemsFromAllDrives=True
+                ).execute()
+                match results:
+                    case {"files": [ {"id": id} ]}:
+                        current_id = id
+                    case _:
+                        logger.debug("Folder not found by path: {}", folder_path)
+                        return None
+        except HttpError as e:
+            logger.error("Error resolving folder path: {}", folder_path)
+            return None
+        self._folder_cache[folder_path] = current_id
+        return current_id
+    def list_directory(self: Self, folder_path: str) -> Collection[GoogleDriveMetadata]:
+        """List contents of a directory with basic metadata."""
+        folder_id = self._resolve_folder_id(folder_path)
+        if not folder_id:
+            logger.debug("Folder not found by path: {}", folder_path)
+            return ()
+        try:
+            results = self._google_drive_client.files().list(
+                q=f"'{folder_id}' in parents",
+                fields="files(id,name,mimeType,modifiedTime)",
+                supportsAllDrives=True,
+                includeItemsFromAllDrives=True,
+                pageSize=1000
+            ).execute()
+            return tuple(GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result)
+                         for result
+                         in results.get('files', ()))
+        except HttpError as e:
+            logger.error("Error listing folder by path, {}: {}", folder_path, e)
+            return ()
+    def get_metadata(self: Self, item_path: str) -> Optional[GoogleDriveMetadata]:
+        """Get metadata for a specific file/folder by path."""
+        match item_path.rsplit(PATH_SEPARATOR, 1):
+            case [item_name]:
+                folder_path = ""
+                folder_id = self.settings.GOOGLE_DRIVE_ROOT_ID
+            case [folder_path, item_name]:
+                folder_id = self._resolve_folder_id(folder_path)
+        if not folder_id:
+            logger.debug("Folder not found by path: {}", folder_path)
+            return None
+        try:
+            results = self._google_drive_client.files().list(
+                q=f"name='{item_name}' and '{folder_id}' in parents",
+                fields="files(id,name,mimeType,modifiedTime)",
+                supportsAllDrives=True,
+                includeItemsFromAllDrives=True,
+                pageSize=1
+            ).execute()
+            match results:
+                case {"files": [result]}:
+                    return GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result)
+        except HttpError as e:
+            logger.error("Error getting metadata for item by path, {}: {}", item_path, e)
+        logger.debug("Item not found by path: {}", item_path)
+        return None
+    def read_file_by_id(self: Self, file_id: str) -> Optional[bytes]:
+        """Read contents of a file by its unique identifier."""
+        try:
+            request = self._google_drive_client.files().get_media(fileId=file_id)
+            buffer = BytesIO()
+            downloader = MediaIoBaseDownload(buffer, request)
+            done = False
+            while not done:
+                _, done = downloader.next_chunk()
+            return buffer.getvalue()
+        except HttpError as e:
+            logger.error("Error reading file by ID, {}: {}", file_id, e)
+            return None

src/ctp_slack_bot/services/schedule_service.py CHANGED Viewed

@@ -2,6 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.cron import CronTrigger
 from asyncio import create_task, iscoroutinefunction, to_thread
 from datetime import datetime
 from loguru import logger
 from pydantic import BaseModel, PrivateAttr
 from pytz import timezone
@@ -47,11 +48,21 @@ class ScheduleService(BaseModel):
     def start(self: Self) -> None:
         self._scheduler.start()
-        logger.info("Started scheduler.")
     def stop(self: Self) -> None:
         if self._scheduler.running:
-            self._scheduler.shutdown(wait=False)
-            logger.info("Shut down scheduler.")
         else:
             logger.debug("The scheduler is not running. There is no scheduler to shut down.")

 from apscheduler.triggers.cron import CronTrigger
 from asyncio import create_task, iscoroutinefunction, to_thread
 from datetime import datetime
+from dependency_injector.resources import Resource
 from loguru import logger
 from pydantic import BaseModel, PrivateAttr
 from pytz import timezone
     def start(self: Self) -> None:
         self._scheduler.start()
     def stop(self: Self) -> None:
         if self._scheduler.running:
+            self._scheduler.shutdown()
         else:
             logger.debug("The scheduler is not running. There is no scheduler to shut down.")
+class ScheduleServiceResource(Resource):
+    def init(self: Self, settings: Settings) -> ScheduleService:
+        logger.info("Starting scheduler…")
+        schedule_service = ScheduleService(settings=settings)
+        schedule_service.start()
+        return schedule_service
+    def shutdown(self: Self, schedule_service: ScheduleService) -> None:
+        """Stop scheduler on shutdown."""
+        schedule_service.stop()
+        logger.info("Stopped scheduler.")

src/ctp_slack_bot/services/vector_database_service.py CHANGED Viewed

@@ -172,4 +172,4 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
                          bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME)
             logger.debug("Query details: k={}, dimension={}",
                          query.k, len(query.query_embeddings) if query.query_embeddings else "None")
-            raise

                          bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME)
             logger.debug("Query details: k={}, dimension={}",
                          query.k, len(query.query_embeddings) if query.query_embeddings else "None")
+            raise

src/ctp_slack_bot/utils/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@


1	+ from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri

src/ctp_slack_bot/utils/secret_stripper.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from urllib.parse import urlparse, urlunparse
+def sanitize_mongo_db_uri(uri: str) -> str:
+    parts = urlparse(uri)
+    sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
+    return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))