Kevin Li commited on
Commit
fb92766
·
unverified ·
2 Parent(s): deb6243 5c7c7e5

Merge pull request #4 from CUNYTechPrep/refactor-3

Browse files
.env.template CHANGED
@@ -3,15 +3,9 @@
3
  # APScheduler Configuration
4
  SCHEDULER_TIMEZONE=UTC
5
 
6
- # API Configuration
7
- API_HOST=0.0.0.0
8
- API_PORT=8000
9
-
10
  # Slack Configuration
11
  SLACK_BOT_TOKEN=🪙
12
- SLACK_SIGNING_SECRET=🔐
13
  SLACK_APP_TOKEN=🦥
14
- SLACK_USER_TOKEN=🦊
15
 
16
  # Vectorization Configuration
17
  EMBEDDING_MODEL=🌮
@@ -34,3 +28,14 @@ CHAT_MODEL=gpt-3.5-turbo
34
  MAX_TOKENS=150
35
  TEMPERATURE=0.8
36
  SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
 
 
 
 
 
 
 
 
 
 
 
 
3
  # APScheduler Configuration
4
  SCHEDULER_TIMEZONE=UTC
5
 
 
 
 
 
6
  # Slack Configuration
7
  SLACK_BOT_TOKEN=🪙
 
8
  SLACK_APP_TOKEN=🦥
 
9
 
10
  # Vectorization Configuration
11
  EMBEDDING_MODEL=🌮
 
28
  MAX_TOKENS=150
29
  TEMPERATURE=0.8
30
  SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
31
+
32
+ # Google Drive Configuration
33
+ GOOGLE_DRIVE_ROOT_ID=1NB91EcIUXbOVcdCkXOAHdmWrDfgoh9fQ
34
+ GOOGLE_PROJECT_ID=insufferable-slacker-123456
35
+ GOOGLE_PRIVATE_KEY_ID=1a2b3c4d5e6f748891091d21304e506674829507
36
+ GOOGLE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASC...\n-----END PRIVATE KEY-----\n"
37
+ GOOGLE_CLIENT_EMAIL=botty-bot@insufferable-slacker-123456.iam.gserviceaccount.com
38
+ GOOGLE_CLIENT_ID=123456789012345678901
39
+
40
+ # File Monitoring Configuration
41
+ FILE_MONITOR_ROOT_PATH=Transcripts/Friday
notebooks/container.ipynb ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Loading Dependency Injection Container in Jupyter Notebook"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 4,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "from ctp_slack_bot.containers import Container\n",
17
+ "from ctp_slack_bot.services import VectorDatabaseService\n",
18
+ "\n",
19
+ "container = Container()\n",
20
+ "container.wire(packages=['ctp_slack_bot'])"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stderr",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "\u001b[32m2025-04-19 16:43:46.927\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n"
33
+ ]
34
+ },
35
+ {
36
+ "data": {
37
+ "text/plain": [
38
+ "Settings(LOG_LEVEL='INFO', LOG_FORMAT='json', SCHEDULER_TIMEZONE='America/New_York', SLACK_BOT_TOKEN=SecretStr('**********'), SLACK_APP_TOKEN=SecretStr('**********'), EMBEDDING_MODEL='text-embedding-3-small', VECTOR_DIMENSION=1536, CHUNK_SIZE=1000, CHUNK_OVERLAP=200, TOP_K_MATCHES=5, MONGODB_URI=SecretStr('**********'), MONGODB_NAME='ctp_slack_bot', SCORE_THRESHOLD=0.5, HF_API_TOKEN=SecretStr('**********'), OPENAI_API_KEY=SecretStr('**********'), CHAT_MODEL='gpt-3.5-turbo', MAX_TOKENS=150, TEMPERATURE=0.8, SYSTEM_PROMPT=\"You are a helpful teaching assistant for a data science class.\\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\\nYour responses should be:\\n\\n1. Accurate and based on the class content\\n2. Clear and educational\\n3. Concise but complete\\nIf you're unsure about something, acknowledge it and suggest asking the professor.\", GOOGLE_PROJECT_ID='voltaic-reducer-294821', GOOGLE_PRIVATE_KEY_ID=SecretStr('**********'), GOOGLE_PRIVATE_KEY=SecretStr('**********'), GOOGLE_CLIENT_ID='102943207835073856980', GOOGLE_CLIENT_EMAIL='[email protected]', GOOGLE_AUTH_URI='https://accounts.google.com/o/oauth2/auth', GOOGLE_TOKEN_URI='https://oauth2.googleapis.com/token', GOOGLE_AUTH_PROVIDER_CERT_URL='https://www.googleapis.com/oauth2/v1/certs', GOOGLE_CLIENT_CERT_URL='https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com', GOOGLE_UNIVERSE_DOMAIN='googleapis.com', FILE_MONITOR_ROOT_PATH='Transcripts/Friday Building AI Applications Session')"
39
+ ]
40
+ },
41
+ "execution_count": 2,
42
+ "metadata": {},
43
+ "output_type": "execute_result"
44
+ }
45
+ ],
46
+ "source": [
47
+ "container.settings()"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "metadata": {},
54
+ "outputs": [
55
+ {
56
+ "name": "stderr",
57
+ "output_type": "stream",
58
+ "text": [
59
+ "\u001b[32m2025-04-19 16:45:25.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n"
60
+ ]
61
+ },
62
+ {
63
+ "name": "stderr",
64
+ "output_type": "stream",
65
+ "text": [
66
+ "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n",
67
+ "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n",
68
+ "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n",
69
+ "\u001b[32m2025-04-19 16:45:26.000\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n",
70
+ "\u001b[32m2025-04-19 16:45:26.279\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
71
+ "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n",
72
+ "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n"
73
+ ]
74
+ }
75
+ ],
76
+ "source": [
77
+ "vector_database_service: VectorDatabaseService = container.vector_database_service()"
78
+ ]
79
+ }
80
+ ],
81
+ "metadata": {
82
+ "kernelspec": {
83
+ "display_name": ".venv",
84
+ "language": "python",
85
+ "name": "python3"
86
+ },
87
+ "language_info": {
88
+ "codemirror_mode": {
89
+ "name": "ipython",
90
+ "version": 3
91
+ },
92
+ "file_extension": ".py",
93
+ "mimetype": "text/x-python",
94
+ "name": "python",
95
+ "nbconvert_exporter": "python",
96
+ "pygments_lexer": "ipython3",
97
+ "version": "3.12.3"
98
+ }
99
+ },
100
+ "nbformat": 4,
101
+ "nbformat_minor": 2
102
+ }
notebooks/google_drive.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Google Drive WebVTT Vectorizer and Storer"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": null,
13
+ "metadata": {},
14
+ "outputs": [
15
+ {
16
+ "name": "stderr",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "\u001b[32m2025-04-19 19:21:27.333\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n",
20
+ "\u001b[32m2025-04-19 19:21:27.334\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n",
21
+ "\u001b[32m2025-04-19 19:21:27.337\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.google_drive_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreated GoogleDriveService\u001b[0m\n",
22
+ "\u001b[32m2025-04-19 19:21:27.361\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated EmbeddingsModelService\u001b[0m\n",
23
+ "\u001b[32m2025-04-19 19:21:27.362\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vectorization_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated VectorizationService\u001b[0m\n"
24
+ ]
25
+ },
26
+ {
27
+ "name": "stderr",
28
+ "output_type": "stream",
29
+ "text": [
30
+ "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n",
31
+ "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n",
32
+ "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n",
33
+ "\u001b[32m2025-04-19 19:21:27.365\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n",
34
+ "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
35
+ "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n",
36
+ "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n"
37
+ ]
38
+ }
39
+ ],
40
+ "source": [
41
+ "from datetime import datetime\n",
42
+ "from functools import partial\n",
43
+ "from html import escape\n",
44
+ "from IPython.display import display_html\n",
45
+ "from itertools import chain\n",
46
+ "from textwrap import wrap\n",
47
+ "from zoneinfo import ZoneInfo\n",
48
+ "\n",
49
+ "from ctp_slack_bot.containers import Container\n",
50
+ "from ctp_slack_bot.models import WebVTTContent\n",
51
+ "\n",
52
+ "display_html = partial(display_html, raw=True)\n",
53
+ "\n",
54
+ "container = Container()\n",
55
+ "google_drive_service = container.google_drive_service()\n",
56
+ "vectorization_service = container.vectorization_service()\n",
57
+ "vector_database_service = container.vector_database_service()"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "markdown",
62
+ "metadata": {},
63
+ "source": [
64
+ "## Configuration\n",
65
+ "\n",
66
+ "⚠️ Configure before running the code to avoid processing the wrong file type or re-uploading past files which were already uploaded."
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 2,
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "MIME_TYPE = \"text/vtt\" # This should probably not be changed.\n",
76
+ "\n",
77
+ "MODIFICATION_TIME_CUTOFF = datetime(2024, 8, 30, tzinfo=ZoneInfo(\"UTC\"))"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "metadata": {},
83
+ "source": [
84
+ "## Upload"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 3,
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "data": {
94
+ "text/html": [
95
+ "<p>Found 7 files/folders.</p>"
96
+ ]
97
+ },
98
+ "metadata": {},
99
+ "output_type": "display_data"
100
+ },
101
+ {
102
+ "data": {
103
+ "text/html": [
104
+ "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
105
+ ]
106
+ },
107
+ "metadata": {},
108
+ "output_type": "display_data"
109
+ },
110
+ {
111
+ "data": {
112
+ "text/html": [
113
+ "<p>7 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off.</p>"
114
+ ]
115
+ },
116
+ "metadata": {},
117
+ "output_type": "display_data"
118
+ },
119
+ {
120
+ "data": {
121
+ "text/html": [
122
+ "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
123
+ ]
124
+ },
125
+ "metadata": {},
126
+ "output_type": "display_data"
127
+ },
128
+ {
129
+ "data": {
130
+ "text/html": [
131
+ "<p>7 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off and MIME type (<em>text/vtt</em>) criterion.</p>"
132
+ ]
133
+ },
134
+ "metadata": {},
135
+ "output_type": "display_data"
136
+ },
137
+ {
138
+ "data": {
139
+ "text/html": [
140
+ "<ul><li>Week-03-Analytics-Friday-2024-09-13.cc.vtt</li><li>Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt</li><li>Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt</li><li>Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt</li><li>Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt</li><li>Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt</li><li>Week-01-Setup-Pandas-Friday-2024-08-30.vtt</li></ul>"
141
+ ]
142
+ },
143
+ "metadata": {},
144
+ "output_type": "display_data"
145
+ }
146
+ ],
147
+ "source": [
148
+ "item_metadata = google_drive_service.list_directory(\"\")\n",
149
+ "display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
150
+ "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in item_metadata), \"</ul>\")))\n",
151
+ "\n",
152
+ "recent_metadata = tuple(filter(lambda metadata: MODIFICATION_TIME_CUTOFF <= metadata.modified_time, item_metadata))\n",
153
+ "display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off.</p>\")\n",
154
+ "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in recent_metadata), \"</ul>\")))\n",
155
+ "\n",
156
+ "metadata_to_process = tuple(filter(lambda metadata: metadata.mime_type == MIME_TYPE, recent_metadata))\n",
157
+ "display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off and MIME type (<em>{MIME_TYPE}</em>) criterion.</p>\")\n",
158
+ "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.name)}</li>\" for metadata in metadata_to_process), \"</ul>\")))"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 4,
164
+ "metadata": {},
165
+ "outputs": [
166
+ {
167
+ "data": {
168
+ "text/html": [
169
+ "Processed 7 files."
170
+ ]
171
+ },
172
+ "metadata": {},
173
+ "output_type": "display_data"
174
+ }
175
+ ],
176
+ "source": [
177
+ "web_vtts = tuple(WebVTTContent.from_bytes(f\"googledrive:{metadata.folder_path}/{metadata.name}\",\n",
178
+ " {\n",
179
+ " \"filename\": metadata.name,\n",
180
+ " \"mimeType\": metadata.mime_type,\n",
181
+ " \"modificationTime\": metadata.modified_time\n",
182
+ " },\n",
183
+ " google_drive_service.read_file_by_id(metadata.id))\n",
184
+ " for metadata\n",
185
+ " in metadata_to_process)\n",
186
+ "\n",
187
+ "display_html(f\"Processed {len(web_vtts)} files.\")"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 5,
193
+ "metadata": {},
194
+ "outputs": [
195
+ {
196
+ "data": {
197
+ "text/html": [
198
+ "Chunked Week-03-Analytics-Friday-2024-09-13.cc.vtt into 496 chunks."
199
+ ]
200
+ },
201
+ "metadata": {},
202
+ "output_type": "display_data"
203
+ },
204
+ {
205
+ "name": "stderr",
206
+ "output_type": "stream",
207
+ "text": [
208
+ "\u001b[32m2025-04-19 19:21:37.826\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 496 text string(s)…\u001b[0m\n"
209
+ ]
210
+ },
211
+ {
212
+ "data": {
213
+ "text/html": [
214
+ "Vectorized Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 chunks."
215
+ ]
216
+ },
217
+ "metadata": {},
218
+ "output_type": "display_data"
219
+ },
220
+ {
221
+ "name": "stderr",
222
+ "output_type": "stream",
223
+ "text": [
224
+ "\u001b[32m2025-04-19 19:21:42.297\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 496 chunks\u001b[0m\n",
225
+ "\u001b[32m2025-04-19 19:21:42.319\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
226
+ "\u001b[32m2025-04-19 19:21:42.320\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
227
+ "\u001b[32m2025-04-19 19:21:42.340\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
228
+ "\u001b[32m2025-04-19 19:21:42.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
229
+ "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
230
+ "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
231
+ "\u001b[32m2025-04-19 19:21:42.380\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
232
+ "\u001b[32m2025-04-19 19:21:42.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
233
+ "\u001b[32m2025-04-19 19:21:42.505\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 496 documents into vectors collection\u001b[0m\n",
234
+ "\u001b[32m2025-04-19 19:21:48.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 496 vector chunks in database\u001b[0m\n"
235
+ ]
236
+ },
237
+ {
238
+ "data": {
239
+ "text/html": [
240
+ "Stored Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 vectorized chunks to the database."
241
+ ]
242
+ },
243
+ "metadata": {},
244
+ "output_type": "display_data"
245
+ },
246
+ {
247
+ "data": {
248
+ "text/html": [
249
+ "Chunked Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt into 321 chunks."
250
+ ]
251
+ },
252
+ "metadata": {},
253
+ "output_type": "display_data"
254
+ },
255
+ {
256
+ "name": "stderr",
257
+ "output_type": "stream",
258
+ "text": [
259
+ "\u001b[32m2025-04-19 19:21:48.866\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 321 text string(s)…\u001b[0m\n"
260
+ ]
261
+ },
262
+ {
263
+ "data": {
264
+ "text/html": [
265
+ "Vectorized Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 chunks."
266
+ ]
267
+ },
268
+ "metadata": {},
269
+ "output_type": "display_data"
270
+ },
271
+ {
272
+ "name": "stderr",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "\u001b[32m2025-04-19 19:21:52.629\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 321 chunks\u001b[0m\n",
276
+ "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
277
+ "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
278
+ "\u001b[32m2025-04-19 19:21:52.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
279
+ "\u001b[32m2025-04-19 19:21:52.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
280
+ "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
281
+ "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
282
+ "\u001b[32m2025-04-19 19:21:52.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
283
+ "\u001b[32m2025-04-19 19:21:52.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
284
+ "\u001b[32m2025-04-19 19:21:52.831\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 321 documents into vectors collection\u001b[0m\n",
285
+ "\u001b[32m2025-04-19 19:21:58.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 321 vector chunks in database\u001b[0m\n"
286
+ ]
287
+ },
288
+ {
289
+ "data": {
290
+ "text/html": [
291
+ "Stored Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 vectorized chunks to the database."
292
+ ]
293
+ },
294
+ "metadata": {},
295
+ "output_type": "display_data"
296
+ },
297
+ {
298
+ "data": {
299
+ "text/html": [
300
+ "Chunked Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt into 337 chunks."
301
+ ]
302
+ },
303
+ "metadata": {},
304
+ "output_type": "display_data"
305
+ },
306
+ {
307
+ "name": "stderr",
308
+ "output_type": "stream",
309
+ "text": [
310
+ "\u001b[32m2025-04-19 19:21:58.231\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 337 text string(s)…\u001b[0m\n"
311
+ ]
312
+ },
313
+ {
314
+ "data": {
315
+ "text/html": [
316
+ "Vectorized Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 chunks."
317
+ ]
318
+ },
319
+ "metadata": {},
320
+ "output_type": "display_data"
321
+ },
322
+ {
323
+ "name": "stderr",
324
+ "output_type": "stream",
325
+ "text": [
326
+ "\u001b[32m2025-04-19 19:22:02.126\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 337 chunks\u001b[0m\n",
327
+ "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
328
+ "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
329
+ "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
330
+ "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
331
+ "\u001b[32m2025-04-19 19:22:02.186\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
332
+ "\u001b[32m2025-04-19 19:22:02.187\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
333
+ "\u001b[32m2025-04-19 19:22:02.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
334
+ "\u001b[32m2025-04-19 19:22:02.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
335
+ "\u001b[32m2025-04-19 19:22:02.354\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 337 documents into vectors collection\u001b[0m\n",
336
+ "\u001b[32m2025-04-19 19:22:08.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 337 vector chunks in database\u001b[0m\n"
337
+ ]
338
+ },
339
+ {
340
+ "data": {
341
+ "text/html": [
342
+ "Stored Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 vectorized chunks to the database."
343
+ ]
344
+ },
345
+ "metadata": {},
346
+ "output_type": "display_data"
347
+ },
348
+ {
349
+ "data": {
350
+ "text/html": [
351
+ "Chunked Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt into 341 chunks."
352
+ ]
353
+ },
354
+ "metadata": {},
355
+ "output_type": "display_data"
356
+ },
357
+ {
358
+ "name": "stderr",
359
+ "output_type": "stream",
360
+ "text": [
361
+ "\u001b[32m2025-04-19 19:22:08.524\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 341 text string(s)…\u001b[0m\n"
362
+ ]
363
+ },
364
+ {
365
+ "data": {
366
+ "text/html": [
367
+ "Vectorized Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 chunks."
368
+ ]
369
+ },
370
+ "metadata": {},
371
+ "output_type": "display_data"
372
+ },
373
+ {
374
+ "name": "stderr",
375
+ "output_type": "stream",
376
+ "text": [
377
+ "\u001b[32m2025-04-19 19:22:12.675\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 341 chunks\u001b[0m\n",
378
+ "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
379
+ "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
380
+ "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
381
+ "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
382
+ "\u001b[32m2025-04-19 19:22:12.750\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
383
+ "\u001b[32m2025-04-19 19:22:12.751\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
384
+ "\u001b[32m2025-04-19 19:22:12.773\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
385
+ "\u001b[32m2025-04-19 19:22:12.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
386
+ "\u001b[32m2025-04-19 19:22:12.926\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 341 documents into vectors collection\u001b[0m\n",
387
+ "\u001b[32m2025-04-19 19:22:18.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 341 vector chunks in database\u001b[0m\n"
388
+ ]
389
+ },
390
+ {
391
+ "data": {
392
+ "text/html": [
393
+ "Stored Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 vectorized chunks to the database."
394
+ ]
395
+ },
396
+ "metadata": {},
397
+ "output_type": "display_data"
398
+ },
399
+ {
400
+ "data": {
401
+ "text/html": [
402
+ "Chunked Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt into 378 chunks."
403
+ ]
404
+ },
405
+ "metadata": {},
406
+ "output_type": "display_data"
407
+ },
408
+ {
409
+ "name": "stderr",
410
+ "output_type": "stream",
411
+ "text": [
412
+ "\u001b[32m2025-04-19 19:22:18.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 378 text string(s)…\u001b[0m\n"
413
+ ]
414
+ },
415
+ {
416
+ "data": {
417
+ "text/html": [
418
+ "Vectorized Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 chunks."
419
+ ]
420
+ },
421
+ "metadata": {},
422
+ "output_type": "display_data"
423
+ },
424
+ {
425
+ "name": "stderr",
426
+ "output_type": "stream",
427
+ "text": [
428
+ "\u001b[32m2025-04-19 19:22:21.808\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 378 chunks\u001b[0m\n",
429
+ "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
430
+ "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
431
+ "\u001b[32m2025-04-19 19:22:21.873\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
432
+ "\u001b[32m2025-04-19 19:22:21.874\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
433
+ "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
434
+ "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
435
+ "\u001b[32m2025-04-19 19:22:21.914\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
436
+ "\u001b[32m2025-04-19 19:22:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
437
+ "\u001b[32m2025-04-19 19:22:22.035\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 378 documents into vectors collection\u001b[0m\n",
438
+ "\u001b[32m2025-04-19 19:22:28.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 378 vector chunks in database\u001b[0m\n"
439
+ ]
440
+ },
441
+ {
442
+ "data": {
443
+ "text/html": [
444
+ "Stored Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 vectorized chunks to the database."
445
+ ]
446
+ },
447
+ "metadata": {},
448
+ "output_type": "display_data"
449
+ },
450
+ {
451
+ "data": {
452
+ "text/html": [
453
+ "Chunked Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt into 680 chunks."
454
+ ]
455
+ },
456
+ "metadata": {},
457
+ "output_type": "display_data"
458
+ },
459
+ {
460
+ "name": "stderr",
461
+ "output_type": "stream",
462
+ "text": [
463
+ "\u001b[32m2025-04-19 19:22:28.113\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 680 text string(s)…\u001b[0m\n"
464
+ ]
465
+ },
466
+ {
467
+ "data": {
468
+ "text/html": [
469
+ "Vectorized Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 chunks."
470
+ ]
471
+ },
472
+ "metadata": {},
473
+ "output_type": "display_data"
474
+ },
475
+ {
476
+ "name": "stderr",
477
+ "output_type": "stream",
478
+ "text": [
479
+ "\u001b[32m2025-04-19 19:22:34.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 680 chunks\u001b[0m\n",
480
+ "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
481
+ "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
482
+ "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
483
+ "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
484
+ "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
485
+ "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
486
+ "\u001b[32m2025-04-19 19:22:34.740\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
487
+ "\u001b[32m2025-04-19 19:22:34.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
488
+ "\u001b[32m2025-04-19 19:22:34.866\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 680 documents into vectors collection\u001b[0m\n",
489
+ "\u001b[32m2025-04-19 19:22:43.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 680 vector chunks in database\u001b[0m\n"
490
+ ]
491
+ },
492
+ {
493
+ "data": {
494
+ "text/html": [
495
+ "Stored Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 vectorized chunks to the database."
496
+ ]
497
+ },
498
+ "metadata": {},
499
+ "output_type": "display_data"
500
+ },
501
+ {
502
+ "data": {
503
+ "text/html": [
504
+ "Chunked Week-01-Setup-Pandas-Friday-2024-08-30.vtt into 742 chunks."
505
+ ]
506
+ },
507
+ "metadata": {},
508
+ "output_type": "display_data"
509
+ },
510
+ {
511
+ "name": "stderr",
512
+ "output_type": "stream",
513
+ "text": [
514
+ "\u001b[32m2025-04-19 19:22:43.438\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 742 text string(s)…\u001b[0m\n"
515
+ ]
516
+ },
517
+ {
518
+ "data": {
519
+ "text/html": [
520
+ "Vectorized Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 chunks."
521
+ ]
522
+ },
523
+ "metadata": {},
524
+ "output_type": "display_data"
525
+ },
526
+ {
527
+ "name": "stderr",
528
+ "output_type": "stream",
529
+ "text": [
530
+ "\u001b[32m2025-04-19 19:22:50.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 742 chunks\u001b[0m\n",
531
+ "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
532
+ "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
533
+ "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
534
+ "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
535
+ "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
536
+ "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
537
+ "\u001b[32m2025-04-19 19:22:50.508\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
538
+ "\u001b[32m2025-04-19 19:22:50.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
539
+ "\u001b[32m2025-04-19 19:22:50.626\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 742 documents into vectors collection\u001b[0m\n",
540
+ "\u001b[32m2025-04-19 19:23:01.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 742 vector chunks in database\u001b[0m\n"
541
+ ]
542
+ },
543
+ {
544
+ "data": {
545
+ "text/html": [
546
+ "Stored Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 vectorized chunks to the database."
547
+ ]
548
+ },
549
+ "metadata": {},
550
+ "output_type": "display_data"
551
+ }
552
+ ],
553
+ "source": [
554
+ "for web_vtt in web_vtts:\n",
555
+ " chunks = web_vtt.get_chunks()\n",
556
+ " display_html(f\"Chunked {web_vtt.get_metadata().get(\"filename\")} into {len(chunks)} chunks.\")\n",
557
+ " vectorized_chunks = vectorization_service.vectorize(chunks)\n",
558
+ " display_html(f\"Vectorized {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} chunks.\")\n",
559
+ " await (await vector_database_service).store(vectorized_chunks)\n",
560
+ " display_html(f\"Stored {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} vectorized chunks to the database.\")"
561
+ ]
562
+ }
563
+ ],
564
+ "metadata": {
565
+ "kernelspec": {
566
+ "display_name": ".venv",
567
+ "language": "python",
568
+ "name": "python3"
569
+ },
570
+ "language_info": {
571
+ "codemirror_mode": {
572
+ "name": "ipython",
573
+ "version": 3
574
+ },
575
+ "file_extension": ".py",
576
+ "mimetype": "text/x-python",
577
+ "name": "python",
578
+ "nbconvert_exporter": "python",
579
+ "pygments_lexer": "ipython3",
580
+ "version": "3.12.3"
581
+ }
582
+ },
583
+ "nbformat": 4,
584
+ "nbformat_minor": 2
585
+ }
notebooks/web-vtt.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/web_vtt.ipynb ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# WebVTT Reading and Chunking Test"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "## Pure `webvtt-py` as Proof-of-concept"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "from datetime import datetime, timedelta\n",
24
+ "from functools import partial\n",
25
+ "from html import escape\n",
26
+ "from io import BytesIO\n",
27
+ "from IPython.display import display_html\n",
28
+ "from itertools import chain\n",
29
+ "import re\n",
30
+ "from webvtt import Caption, WebVTT\n",
31
+ "from webvtt.models import Timestamp\n",
32
+ "from zoneinfo import ZoneInfo\n",
33
+ "\n",
34
+ "display_html = partial(display_html, raw=True)"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n",
44
+ "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
45
+ "BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 3,
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "with open(FILE_PATH, \"rb\") as file:\n",
55
+ " web_vtt = WebVTT.from_buffer(BytesIO(file.read()))"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "execution_count": 4,
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "data": {
65
+ "text/html": [
66
+ "<ul><li>__class__</li><li>__delattr__</li><li>__dict__</li><li>__dir__</li><li>__doc__</li><li>__eq__</li><li>__format__</li><li>__ge__</li><li>__getattribute__</li><li>__getitem__</li><li>__getstate__</li><li>__gt__</li><li>__hash__</li><li>__init__</li><li>__init_subclass__</li><li>__le__</li><li>__len__</li><li>__lt__</li><li>__module__</li><li>__ne__</li><li>__new__</li><li>__reduce__</li><li>__reduce_ex__</li><li>__repr__</li><li>__setattr__</li><li>__sizeof__</li><li>__str__</li><li>__subclasshook__</li><li>__weakref__</li><li>_get_destination_file</li><li>_get_lines</li><li>_has_bom</li><li>captions</li><li>content</li><li>encoding</li><li>file</li><li>footer_comments</li><li>from_buffer</li><li>from_sbv</li><li>from_srt</li><li>from_string</li><li>header_comments</li><li>iter_slice</li><li>read</li><li>read_buffer</li><li>save</li><li>save_as_srt</li><li>styles</li><li>total_length</li><li>write</li></ul>"
67
+ ]
68
+ },
69
+ "metadata": {},
70
+ "output_type": "display_data"
71
+ }
72
+ ],
73
+ "source": [
74
+ "display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(member)}</li>\" for member in dir(web_vtt)), \"</ul>\")))"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": 5,
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "data": {
84
+ "text/html": [
85
+ "\n",
86
+ " <strong>Caption</strong> #344\n",
87
+ " <ul>\n",
88
+ " <li><strong>Start:</strong> Friday, April 11, 2025, 07:36:54 PM EDT</li>\n",
89
+ " <li><strong>Speaker:</strong> CUNY Tech Prep (CTP)</li>\n",
90
+ " <li><strong>Speech:</strong> Alright. You can pick the rooms. Now go into your rooms.</li>\n",
91
+ " <li><strong>End:</strong> Friday, April 11, 2025, 07:36:57 PM EDT</li>\n",
92
+ " </ul>\n",
93
+ " "
94
+ ]
95
+ },
96
+ "metadata": {},
97
+ "output_type": "display_data"
98
+ }
99
+ ],
100
+ "source": [
101
+ "speaker_speech_pattern = re.compile(\"(?:([^:]+): )?(.*)\")\n",
102
+ "\n",
103
+ "match web_vtt.captions[343]:\n",
104
+ " case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):\n",
105
+ " match speaker_speech_pattern.search(text).groups():\n",
106
+ " case (speaker, speech):\n",
107
+ " display_html(f\"\"\"\n",
108
+ " <strong>Caption</strong> #{identifier}\n",
109
+ " <ul>\n",
110
+ " <li><strong>Start:</strong> {BASE_TIME + timedelta(**start_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
111
+ " <li><strong>Speaker:</strong> {escape(speaker)}</li>\n",
112
+ " <li><strong>Speech:</strong> {escape(speech)}</li>\n",
113
+ " <li><strong>End:</strong> {BASE_TIME + timedelta(**end_time.__dict__):%A, %B %d, %Y, %I:%M:%S %p %Z}</li>\n",
114
+ " </ul>\n",
115
+ " \"\"\")"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "markdown",
120
+ "metadata": {},
121
+ "source": [
122
+ "### Chunking\n",
123
+ "\n",
124
+ "In order for chunking to produce bits with useful context, we must not only use the caption (frame) itself, but bundle it with its surrounding frames (before and after messages)."
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 6,
130
+ "metadata": {},
131
+ "outputs": [],
132
+ "source": [
133
+ "from more_itertools import windowed"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 7,
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "CHUNK_FRAMES_OVERLAP = 1\n",
143
+ "CHUNK_FRAMES_WINDOW = 5"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 8,
149
+ "metadata": {},
150
+ "outputs": [
151
+ {
152
+ "data": {
153
+ "text/html": [
154
+ "<table><tr><td>A</td></tr><tr><td>B</td></tr><tr><td>C</td></tr><tr><td>D</td></tr><tr><td>E</td></tr><tr><td>F</td></tr><tr><td>G</td></tr><tr><td>H</td></tr><tr><td>I</td></tr><tr><td>J</td></tr><tr><td>K</td></tr><tr><td>L</td></tr><tr><td>M</td></tr><tr><td>N</td></tr><tr><td>O</td></tr><tr><td>P</td></tr><tr><td>Q</td></tr><tr><td>R</td></tr><tr><td>S</td></tr><tr><td>T</td></tr><tr><td>U</td></tr><tr><td>V</td></tr><tr><td>W</td></tr><tr><td>X</td></tr><tr><td>Y</td></tr><tr><td>Z</td></tr></table>"
155
+ ]
156
+ },
157
+ "metadata": {},
158
+ "output_type": "display_data"
159
+ }
160
+ ],
161
+ "source": [
162
+ "items = tuple(chr(code_point) for code_point in range(ord('A'), ord('[')))\n",
163
+ "display_html(f\"<table>{\"\".join(map(\"<tr><td>{}</td></tr>\".format, items))}</table>\")"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 9,
169
+ "metadata": {},
170
+ "outputs": [
171
+ {
172
+ "data": {
173
+ "text/html": [
174
+ "<table><tr><td>A</td><td>B</td><td>C</td><td>D</td><td>E</td></tr><tr><td>E</td><td>F</td><td>G</td><td>H</td><td>I</td></tr><tr><td>I</td><td>J</td><td>K</td><td>L</td><td>M</td></tr><tr><td>M</td><td>N</td><td>O</td><td>P</td><td>Q</td></tr><tr><td>Q</td><td>R</td><td>S</td><td>T</td><td>U</td></tr><tr><td>U</td><td>V</td><td>W</td><td>X</td><td>Y</td></tr><tr><td>Y</td><td>Z</td><td></td><td></td><td></td></tr></table>"
175
+ ]
176
+ },
177
+ "metadata": {},
178
+ "output_type": "display_data"
179
+ }
180
+ ],
181
+ "source": [
182
+ "chunks = tuple(windowed(items, CHUNK_FRAMES_WINDOW, step=(CHUNK_FRAMES_WINDOW - CHUNK_FRAMES_OVERLAP)))\n",
183
+ "display_html(f\"<table>{\"\".join(f\"<tr>{\"\".join(f\"<td>{item if item else \"\"}</td>\" for item in chunk)}</tr>\" for chunk in chunks)}</table>\")"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "markdown",
188
+ "metadata": {},
189
+ "source": [
190
+ "## Using the `WebVTTFile` Class"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": 10,
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": [
199
+ "from datetime import datetime\n",
200
+ "from hashlib import sha256\n",
201
+ "from zoneinfo import ZoneInfo\n",
202
+ "\n",
203
+ "from ctp_slack_bot.models import WebVTTContent"
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "code",
208
+ "execution_count": null,
209
+ "metadata": {},
210
+ "outputs": [],
211
+ "source": [
212
+ "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n",
213
+ "TIME_ZONE = ZoneInfo(\"America/New_York\")\n",
214
+ "MODIFICATION_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 12,
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "with open(FILE_PATH, \"rb\") as file:\n",
224
+ " bytes = file.read()\n",
225
+ " web_vtt_content = WebVTTContent.from_bytes(sha256(bytes).hexdigest(), {\"modification_time\": MODIFICATION_TIME}, bytes)"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 13,
231
+ "metadata": {},
232
+ "outputs": [
233
+ {
234
+ "data": {
235
+ "text/plain": [
236
+ "(Chunk(text=\"iyeshia: For the workshop. We want to set you up.\\n\\niyeshia: Thank you, Kevin, for a question. We want to set you up for success in year one. And so this workshop is to help you kind of like\\n\\niyeshia: figure out, or how to adjust, as you're coming into your careers what to expect like your 30 days of work, 60 days of work, 90 days of work when you are starting your full time roles. So with that, said, let us get started.\\n\\niyeshia: So the topic, of course, is going to be discussing things of like the onboarding process of what it looks like when you start your jobs. How to maneuver or move around in your workplace environments. We'll discuss negotiating raises, because last time we didn't negotiating offers. So now we pass that you already got the offer. So now we'd be at the\\n\\niyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='1-5', metadata={'start': datetime.timedelta(0), 'end': datetime.timedelta(seconds=60, microseconds=379000), 'speakers': frozenset({'iyeshia'})}),\n",
237
+ " Chunk(text=\"iyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\\n\\niyeshia: So let's kick it off with the onboarding process.\\n\\niyeshia: So with this, what you can expect ideally when you start your your job. There could be some type of welcome package. They might have a folder. They might have an email electronically or things like that. But it's gonna describe the details of like the company's environment. What your 1st day, or your 1st week or 1st month, a couple of months, might look like. As you're starting your onboarding process and the paperwork they might even show with you on the 1st day\\n\\niyeshia: work. You might be paired up with a Buddy or other people who might be hired at the same day, or maybe someone who was hired a year before, and they might be shadowing you to help you join and to get comfortable with your work environment.\\n\\niyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='5-9', metadata={'start': datetime.timedelta(seconds=45, microseconds=930000), 'end': datetime.timedelta(seconds=108, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
238
+ " Chunk(text=\"iyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\\n\\niyeshia: job or career, and then from there, if you're unsure about your onboarding process as you're starting off, please ask questions to your manager or supervisor. The best part is to ask as many questions as you can. You're new, you're learning. They understand that. So they want to hear from you and your input\\n\\niyeshia: from there, I would say, I'm just looking at the\\n\\niyeshia: the chat. Yes, prepare for a lot of paperwork. Yes, I mean W. 2 W. Fours. They might have you fill out all those things. And that was 2. Okay, all right, Kevin.\\n\\niyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='9-13', metadata={'start': datetime.timedelta(seconds=102, microseconds=82000), 'end': datetime.timedelta(seconds=166, microseconds=199000), 'speakers': frozenset({'iyeshia'})}),\n",
239
+ " Chunk(text=\"iyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\\n\\niyeshia: they'll go over etiquette with you of like what you can expect. At the job that can include your attire, your desk hygiene communication, checking in with managers or teams.\\n\\niyeshia: Once you, after the 30 days we get to maybe days, 60 days, and then you're able to develop like your needs. Gain a better understanding of the company, develop plans and deliverables and outcomes. And then you go into your 90 days of being on the job where you're kind of learning your role. You're kind of getting adjust, you're being more effective and being becoming more independent.\\n\\niyeshia: And then from there you be able to understand, like, after the 90 days that you're kind of like settled in maybe months 4 to 6, or maybe the whole year. You should be settled into your role, understanding what's going on understanding how different departments move and things like that. So this is just the overview of what that looks like. It's not necessarily concrete, because every job is different.\\n\\niyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='13-17', metadata={'start': datetime.timedelta(seconds=147, microseconds=8000), 'end': datetime.timedelta(seconds=233, microseconds=730000), 'speakers': frozenset({'iyeshia'})}),\n",
240
+ " Chunk(text=\"iyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\\n\\niyeshia: And so now that we've got through the onboarding process, this is probably the quickest we've done onboarding process because Kevin did it in 2 weeks. So from there we are going to move to navigating the workplace environment.\\n\\niyeshia: And so with that said, some things that are really important in your workplace environment is building relationships. Whether that's with your peers, your colleagues. Your manager. Trying to have a mentor mentee connection. All relationships are important.\\n\\niyeshia: With that I would say that when it comes to identifying your relationship needs, you want to know what you're expecting like, what? How do you need to show up in your role. What do you need from others? Understanding those type of things can help build better, I would say. Connections with your teammates and things of that nature when it's time to like cover problems or solve projects and things like that.\\n\\niyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='17-21', metadata={'start': datetime.timedelta(seconds=220, microseconds=406000), 'end': datetime.timedelta(seconds=315, microseconds=170000), 'speakers': frozenset({'iyeshia'})}),\n",
241
+ " Chunk(text=\"iyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\\n\\niyeshia: And you can add basically help also to learn how to understand and help others. As well.\\n\\niyeshia: Another thing, as far as building relationships goes, is practicing, mindful listening. So the best way to truly listen is to talk less, and of course to understand more. And so when you learn from your teammates, listen as much as you can gain as much knowledge as you can from others, and that's gonna help you kinda conduct, or, you know, be a better team player. In your work environment.\\n\\niyeshia: And then a few things that you can do is\\n\\niyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='21-25', metadata={'start': datetime.timedelta(seconds=288, microseconds=600000), 'end': datetime.timedelta(seconds=376, microseconds=110000), 'speakers': frozenset({'iyeshia'})}),\n",
242
+ " Chunk(text=\"iyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\\n\\niyeshia: I don't care if it's a janitor security. The Cfo treat everybody equal and the same. And get to know. Get to know people because you just never know when you're going to need someone or work with someone. During that time.\\n\\niyeshia: And so those are the ways you can go about it. Greet people. You can invite people to coffee breaks, do quick message, check-in, and things of that nature, and then from there the 6 or 7 1, i think, are really important in the workplace environment. Some of the things you want to do is show gratitude, embrace others, give.\\n\\niyeshia: you know, credit where credit is due. Don't try to take anybody's ideas. If it comes to projects and things like that, that is a serious no-no show gratitude, and by any means necessary, try to avoid any gossip, any issues with office politics stay out of it. This is your first.st\\n\\niyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='25-29', metadata={'start': datetime.timedelta(seconds=351, microseconds=10000), 'end': datetime.timedelta(seconds=438, microseconds=590000), 'speakers': frozenset({'iyeshia'})}),\n",
243
+ " Chunk(text=\"iyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\\n\\niyeshia: So the next thing, as far as we're talking about building relationship goals, you definitely want to also build those relationships, as I stated, with your peers. And things like that. Your coworkers? But you want to make sure you build a relationship with your manager. And just remember that it's important to have a relationship with your manager. But that's not the only relationship that's like you should focus on, you know. Like, I said before, you want to be a team play. You want to treat everybody equally because you just never know who you connect with.\\n\\niyeshia: But when it comes to that manager time, or asking for I would say, supervisions or meetings with them. You can ask questions. Those are always encouraged. You can ask them about their you know, supervisor style. Are they transformative? Are they hands on?\\n\\niyeshia: Do they like feedback directly towards them? Is everything written email? How are they? What's their work? Style? You can even ask them for the expectations of what is this like in a role like, what are your expectations, as far as how you show up in your role to them? And what are they looking for like with the measurements of success. Of course we always tell fellows to document everything that you do, as far as like when it comes to any goals that you bring any success.\\n\\niyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='29-33', metadata={'start': datetime.timedelta(seconds=424, microseconds=830000), 'end': datetime.timedelta(seconds=536, microseconds=219000), 'speakers': frozenset({'iyeshia'})}),\n",
244
+ " Chunk(text=\"iyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\\n\\niyeshia: and then to talk with your manager about not only your successes and what you accomplish, but maybe areas of where you can grow and what you've been struggling to focus on so they can help support you with that as well.\\n\\niyeshia: Be observant in meetings when you're meeting with your team and other people. So that way you could learn about what else is going on, or whatever what everybody else is doing. So you can see how things work together. If you want to connect and socialize, you can ask people to lunch or coffee chats and things like that, and then always just remain proactive. You know it's always a good gesture to ask for teammate. It's like, Hey, is there anything you need before you know the end of the day? Or before I'm about to leave. You know things like that. It's always\\n\\niyeshia: helpful, too, because you never know when it's like your time, and someone is asking or offering help to you. And you're like, Oh, yeah, definitely need help with this. So it's always great to return their favor.\\n\\niyeshia: And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='33-37', metadata={'start': datetime.timedelta(seconds=511, microseconds=850000), 'end': datetime.timedelta(seconds=589, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n",
245
+ " Chunk(text=\"iyeshia: And so\\n\\niyeshia: from there I would say, overall in regards of meeting with your supervisor, depending on how they do it. It could be quarterly it could be every other month. It could be 3 times throughout the year. They have a performance review. And so some companies like to start with, maybe January, you start, or maybe June, you started\\n\\niyeshia: working with them, and you track goals and what you could accomplish. With your manager until, like the next meeting, you have to go over just to make sure that you're on track with your goals throughout the throughout the year, as you've been working with your with your company.\\n\\niyeshia: That you got hired by, and so sometimes they'll do like a mid year review report to see your progress. If there's any touch points they could assist you with or support you with. You can meet with them with one on one meetings. If you feel like that's too long, and you want to make suggestions to meet with them sooner. Maybe you want to do every 3 months\\n\\niyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='37-41', metadata={'start': datetime.timedelta(seconds=587, microseconds=800000), 'end': datetime.timedelta(seconds=654, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
246
+ " Chunk(text=\"iyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\\n\\niyeshia: but of course you'll get used to it. As it progresses. But then, of course, you're still maintaining those connections with your supervisor, so you can definitely ask them questions of what you can expect from a performance review and things like that.\\n\\niyeshia: I'll pause here. If anybody has any questions about anything that I've mentioned. Anything like that?\\n\\niyeshia: Any questions? Are we all good.\\n\\nCUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='41-45', metadata={'start': datetime.timedelta(seconds=645, microseconds=172000), 'end': datetime.timedelta(seconds=682, microseconds=250000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
247
+ " Chunk(text=\"CUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\\n\\nCUNY Tech Prep (CTP): No takers.\\n\\nCUNY Tech Prep (CTP): I have a few comments.\\n\\niyeshia: You want to go ahead, Kevin.\\n\\nCUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='45-49', metadata={'start': datetime.timedelta(seconds=678, microseconds=110000), 'end': datetime.timedelta(seconds=700, microseconds=910000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
248
+ " Chunk(text=\"CUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\\n\\nCUNY Tech Prep (CTP): not such a nice work environment.\\n\\nCUNY Tech Prep (CTP): It helps prevent people from gaslighting. You, for example.\\n\\nCUNY Tech Prep (CTP): And like it keeps you out of trouble. Let's say cause if you self document, then\\n\\nCUNY Tech Prep (CTP): you know exactly what was decided on.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='49-53', metadata={'start': datetime.timedelta(seconds=693, microseconds=509000), 'end': datetime.timedelta(seconds=720, microseconds=809000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
249
+ " Chunk(text=\"CUNY Tech Prep (CTP): you know exactly what was decided on.\\n\\nCUNY Tech Prep (CTP): And you're just following exactly what was said.\\n\\niyeshia: That is correct.\\n\\nCUNY Tech Prep (CTP): And then the setting boundaries right.\\n\\nCUNY Tech Prep (CTP): and there are some. There are some\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='53-57', metadata={'start': datetime.timedelta(seconds=717, microseconds=970000), 'end': datetime.timedelta(seconds=732, microseconds=590000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
250
+ " Chunk(text=\"CUNY Tech Prep (CTP): and there are some. There are some\\n\\nCUNY Tech Prep (CTP): bosses who will push your boundaries. Try to get you to like\\n\\nCUNY Tech Prep (CTP): do overtime. Stay longer than like\\n\\nCUNY Tech Prep (CTP): your stay longer than what's on like the contract, or whatever.\\n\\nCUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='57-61', metadata={'start': datetime.timedelta(seconds=729, microseconds=400000), 'end': datetime.timedelta(seconds=749, microseconds=960000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
251
+ " Chunk(text=\"CUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\\n\\nCUNY Tech Prep (CTP): you should be very clear on\\n\\nCUNY Tech Prep (CTP): your time. Your time limits, like.\\n\\nCUNY Tech Prep (CTP): you know, have always have an out, for\\n\\nCUNY Tech Prep (CTP): when too much is being requested of you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='61-65', metadata={'start': datetime.timedelta(seconds=745, microseconds=275000), 'end': datetime.timedelta(seconds=767, microseconds=120000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
252
+ " Chunk(text=\"CUNY Tech Prep (CTP): when too much is being requested of you.\\n\\nCUNY Tech Prep (CTP): My usual go to is like, Oh, I I have like I have a meeting for Ctp, or like I have class.\\n\\niyeshia: Very good. That's good to good to know. And I know. David. Put in the chat like for an example of documentation. On March 16, th at 4, 35, you said, and I quote that is, that is exactly.\\n\\nCUNY Tech Prep (CTP): Under my lap.\\n\\niyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='65-69', metadata={'start': datetime.timedelta(seconds=764, microseconds=400000), 'end': datetime.timedelta(seconds=803, microseconds=550000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
253
+ " Chunk(text=\"iyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\\n\\nCUNY Tech Prep (CTP): Yeah.\\n\\niyeshia: We wrote that and gave them the time so absolutely documentation goals for the good and for the bad. So definitely. Thank you for sharing that Kevin and David?\\n\\niyeshia: And so with that said, We'll go on to the the next slide. Which is a question of is my manager the same as having a mentor. Does anybody want to come off the come off mute and say yes or no?\\n\\niyeshia: I can just call on Kyle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='69-73', metadata={'start': datetime.timedelta(seconds=795, microseconds=400000), 'end': datetime.timedelta(seconds=831, microseconds=790000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
254
+ " Chunk(text=\"iyeshia: I can just call on Kyle.\\n\\nCUNY Tech Prep (CTP): Kyle, you there.\\n\\nKyle Schoenhardt: No, it's not.\\n\\niyeshia: Okay, let's see.\\n\\niyeshia: Yay, good job, PAL. The answer is, no.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='73-77', metadata={'start': datetime.timedelta(seconds=828, microseconds=820000), 'end': datetime.timedelta(seconds=844, microseconds=930000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'Kyle Schoenhardt', 'iyeshia'})}),\n",
255
+ " Chunk(text=\"iyeshia: Yay, good job, PAL. The answer is, no.\\n\\niyeshia: Did you want to give more input?\\n\\nKyle Schoenhardt: Yeah. Sure.\\n\\niyeshia: Yeah.\\n\\nKyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='77-81', metadata={'start': datetime.timedelta(seconds=841, microseconds=340000), 'end': datetime.timedelta(seconds=869, microseconds=440000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n",
256
+ " Chunk(text=\"Kyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\\n\\nKyle Schoenhardt: how you can improve on yourself like a coach.\\n\\nKyle Schoenhardt: Constantly giving you feedback, whether positive or negative.\\n\\nKyle Schoenhardt: I would say someone you would\\n\\nKyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='81-85', metadata={'start': datetime.timedelta(seconds=850, microseconds=340000), 'end': datetime.timedelta(seconds=885, microseconds=510000), 'speakers': frozenset({'Kyle Schoenhardt'})}),\n",
257
+ " Chunk(text=\"Kyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\\n\\nKyle Schoenhardt: a good indicator, that that person is not a mentor, or, if you need help with something, your your 1st go to person to that you think of is\\n\\nKyle Schoenhardt: someone else that is probably who your mentor is most likely to be, could be a coworker. It could be a manager, but it's not always.\\n\\niyeshia: Got it. Thank you, Kevin. I mean. Thank you, Kyle, said Kevin. Thank you. Kyle. Appreciate that. With that, said, I don't feel like I need to add any more. I feel like Kyle took that. So I'm gonna move on to the day.\\n\\niyeshia: So the next question is, should my manager, be my mentor.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='85-89', metadata={'start': datetime.timedelta(seconds=879, microseconds=360000), 'end': datetime.timedelta(seconds=919, microseconds=30000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n",
258
+ " Chunk(text=\"iyeshia: So the next question is, should my manager, be my mentor.\\n\\niyeshia: Alison.\\n\\nAllison Lee: Well, you you can't force a mentor mentee relationship if that's not how it's going to work.\\n\\nAllison Lee: But it is possible for your manager to be some kind of mentor figure.\\n\\niyeshia: Thank you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='89-93', metadata={'start': datetime.timedelta(seconds=914, microseconds=565000), 'end': datetime.timedelta(seconds=945, microseconds=810000), 'speakers': frozenset({'iyeshia', 'Allison Lee'})}),\n",
259
+ " Chunk(text=\"iyeshia: Thank you.\\n\\niyeshia: So with that, said.\\n\\niyeshia: that depends. So I appreciate Allison. Your response. It definitely depends. Can't force them. But of course, if you do get along with your supervisor, and you want to ask them that\\n\\niyeshia: by all means. But good, answers everyone.\\n\\niyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='93-97', metadata={'start': datetime.timedelta(seconds=944, microseconds=920000), 'end': datetime.timedelta(seconds=975, microseconds=362000), 'speakers': frozenset({'iyeshia'})}),\n",
260
+ " Chunk(text=\"iyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\\n\\niyeshia: Kyle touched on was that they provide support, wisdom to help you succeed in certain examples are, this is pretty much sharing any ideas you might have with them from paying program with you on a code base providing feedback, maybe on a slide deck to helping you remind that it's impossible to know everything. So they're kind of reassuring you in your in your role as you're starting your career.\\n\\niyeshia: and then you want to make sure your mentor is a is a safe space for you at the time. Sometimes your mentor. You can talk to your mentor about your manager sometimes if they are difficult or not, and so from there it's a form of trust\\n\\niyeshia: with your with your mentor. So if you have, if you are blessed to have a supervisor who can be both roles, a manager and a mentor. Go for it, if you're like. I'm still learning. I'm only 3, 30 days in 60 days, 90 days. Take your time, then. So that is definitely something to to know from that.\\n\\niyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='97-101', metadata={'start': datetime.timedelta(seconds=964, microseconds=630000), 'end': datetime.timedelta(seconds=1046, microseconds=430000), 'speakers': frozenset({'iyeshia'})}),\n",
261
+ " Chunk(text=\"iyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\\n\\niyeshia: anyone?\\n\\niyeshia: Go ahead, Devon, please.\\n\\nDevin Xie (no cam): Employee resource groups.\\n\\niyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='101-105', metadata={'start': datetime.timedelta(seconds=1035, microseconds=839000), 'end': datetime.timedelta(seconds=1085, microseconds=780000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
262
+ " Chunk(text=\"iyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\\n\\niyeshia: in some of your environments. And then, when you have your community, you can always reflect on interests related to tech.\\n\\niyeshia: or maybe research on your company like, who's in your area. And you could always reach out to some people for informational interviews. If you're really trying to seek this mentor Mentee relationship from people who are at your company. So just to keep that in mind.\\n\\niyeshia: I think I saw something.\\n\\niyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='105-109', metadata={'start': datetime.timedelta(seconds=1057, microseconds=780000), 'end': datetime.timedelta(seconds=1131, microseconds=240000), 'speakers': frozenset({'iyeshia'})}),\n",
263
+ " Chunk(text=\"iyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\\n\\niyeshia: with that, said, I think y'all know the roles between manager and mentor, and I appreciate that.\\n\\niyeshia: So now the next part is negotiating raises. So the last workshop we did was negotiating offers, as I stated before. So this one's gonna be a little different. You got the job. So now, after that whole success in your 1st year you want to start discussing maybe time for a raise. So let's get into that.\\n\\niyeshia: So you did a great job.\\n\\niyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='109-113', metadata={'start': datetime.timedelta(seconds=1114, microseconds=170000), 'end': datetime.timedelta(seconds=1167, microseconds=119000), 'speakers': frozenset({'iyeshia'})}),\n",
264
+ " Chunk(text=\"iyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\\n\\niyeshia: Your success is going to be measured by achievements, contributions into your organization, and that could be rewarded with\\n\\niyeshia: money or something else you value that could be related to time. Things of that nature. You want to go up based off your benefits. As we stated before, in the last workshop, you might wanna negotiate that. But if you want to talk about money first.st That's okay, too.\\n\\niyeshia: And these are gonna help you, too, as well with your I would say. Manager or supervisor. Meetings\\n\\niyeshia: from there.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='113-117', metadata={'start': datetime.timedelta(seconds=1160, microseconds=790000), 'end': datetime.timedelta(seconds=1199, microseconds=450000), 'speakers': frozenset({'iyeshia'})}),\n",
265
+ " Chunk(text=\"iyeshia: from there.\\n\\niyeshia: So just remember that it's okay when you when you flex those negotiating offers or flex those muscles during conversations around raises. It's not bragging. If you're talking about your achievements and things like that. It's okay to to talk about your successes, you know, especially during a raise time, because you're trying to show your manager or prove what you brought to the to the table. So keep that in mind.\\n\\niyeshia: So how does it look.\\n\\nCUNY Tech Prep (CTP): Comments, sorry.\\n\\niyeshia: Yeah, that is.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='117-121', metadata={'start': datetime.timedelta(seconds=1198, microseconds=703000), 'end': datetime.timedelta(seconds=1228, microseconds=390000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
266
+ " Chunk(text='iyeshia: Yeah, that is.\\n\\nCUNY Tech Prep (CTP): Something you would also document. If your manager praises you, you document that.\\n\\niyeshia: That.\\n\\nCUNY Tech Prep (CTP): Is evidence you can use in your negotiations.\\n\\niyeshia: That is such a fact.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='121-125', metadata={'start': datetime.timedelta(seconds=1227, microseconds=350000), 'end': datetime.timedelta(seconds=1240, microseconds=380000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
267
+ " Chunk(text=\"iyeshia: That is such a fact.\\n\\niyeshia: I literally just copy to paste everything, my manager said. Yep, one of my negotiation days. Yep, so thank you, Kevin, for saying that? So with that said, if you have those those meetings with them, document not only what you say, but what they said, as Kevin mentioned.\\n\\niyeshia: That was great in the negotiating offer. So how else do we prepare for this?\\n\\niyeshia: You're going to research? Yes, you're going to gather all your feedback, whether it's from your colleagues and meetings, whether it's from the success that you hear from your manager or tips from people that you work with, you're going to make sure you learn about your role. What's going on in the market. Just research is going to be your best.\\n\\niyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='125-129', metadata={'start': datetime.timedelta(seconds=1238, microseconds=990000), 'end': datetime.timedelta(seconds=1296, microseconds=189000), 'speakers': frozenset({'iyeshia'})}),\n",
268
+ " Chunk(text=\"iyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\\n\\niyeshia: yeah, document everything, because you might forget some stuff. So that's definitely gonna help, too.\\n\\niyeshia: With that, said, you want to make sure you remind everyone. Maybe you save a bunch of money for the company. Oh, maybe you help them with other accomplishments, or maybe you spend off a project that's done really well. For your department. Share it. So please feel free to do that.\\n\\niyeshia: and then that will also help you keep your resume updated as well. So you don't have to worry about trying to\\n\\niyeshia: scatter or get all your thoughts together at the last minute.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='129-133', metadata={'start': datetime.timedelta(seconds=1281, microseconds=940000), 'end': datetime.timedelta(seconds=1331, microseconds=399000), 'speakers': frozenset({'iyeshia'})}),\n",
269
+ " Chunk(text=\"iyeshia: scatter or get all your thoughts together at the last minute.\\n\\niyeshia: And then with that status also, your manager needs to have the facts, too, to convince their boss to approve you for a raise. So if your manager is giving you the praises already, they're like, yeah, I did say that like\\n\\niyeshia: as well. Even if they make a joke like saying to you like, Hey, you deserve a raise document that you could go right back to like, you know. April 11th at 5, at 6 58 pm. You said, I deserve a raise this time like it. Just everything will just work for you in your favor for that, so please feel free to do that.\\n\\niyeshia: And so now you did the you did the raise. You had the meeting with your your manager. They're proposing it to the Supervisor, or things of that nature. I know different companies work in different ways, so they might have you go directly to your boss's boss to talk about the raise, or whoever is in charge of that\\n\\niyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='133-137', metadata={'start': datetime.timedelta(seconds=1328, microseconds=370000), 'end': datetime.timedelta(seconds=1410, microseconds=720000), 'speakers': frozenset({'iyeshia'})}),\n",
270
+ " Chunk(text=\"iyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\\n\\niyeshia: negotiate for promotion which would include maybe getting a title change, or better money that comes with it. This is why we say research, because you can definitely research what's going on in the market saying, Hey, that's my job. But the title is different.\\n\\niyeshia: Look that up and like definitely propose that if you want to. You can even ask for a faster review cycle. If they say something like, Hey, we can't give that to you. Just yet today. But let's revisit this topic on the 6 months, maybe, like, hey? Can we meet sooner, maybe in 3 months, to discuss more about how I can go about this\\n\\niyeshia: and then you could simply, if they say no. Ask why? Because you don't want to hear anything as far as like knowing that period. No, they should give you an explanation for it. So always ask questions with that to help like what's driving? That? Was it bad timing? Is there a gap? Is there their cap? Is there certain budgets. Did I miss anything that could help? So they can definitely\\n\\niyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='137-141', metadata={'start': datetime.timedelta(seconds=1386, microseconds=520000), 'end': datetime.timedelta(seconds=1487, microseconds=429000), 'speakers': frozenset({'iyeshia'})}),\n",
271
+ " Chunk(text=\"iyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\\n\\niyeshia: Nobody. Okay. Devin.\\n\\nCUNY Tech Prep (CTP): Devin does Devon.\\n\\nDevin Xie (no cam): Just curious. So like, say, we\\n\\nDevin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='141-145', metadata={'start': datetime.timedelta(seconds=1467, microseconds=260000), 'end': datetime.timedelta(seconds=1503, microseconds=140000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia', 'Devin Xie (no cam)'})}),\n",
272
+ " Chunk(text=\"Devin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\\n\\nDevin Xie (no cam): And then we have questions about this stuff like.\\n\\nDevin Xie (no cam): let's say we work there for like a year. And we\\n\\nDevin Xie (no cam): we stop. We we want to ask for some advice. Can we still hit you guys up.\\n\\niyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='145-149', metadata={'start': datetime.timedelta(seconds=1499, microseconds=630000), 'end': datetime.timedelta(seconds=1531, microseconds=469000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
273
+ " Chunk(text=\"iyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\\n\\niyeshia: that, and I will be gladly to assist you. There. We have a career coach there, but usually all the the staff is on the Ctv team is on the alumni channel. So yeah, definitely. But we also like, I said before, Devin, save the Powerpoint, too.\\n\\niyeshia: Just putting that out there? So yeah, good question.\\n\\niyeshia: Okay?\\n\\niyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='149-153', metadata={'start': datetime.timedelta(seconds=1513, microseconds=30000), 'end': datetime.timedelta(seconds=1577, microseconds=890000), 'speakers': frozenset({'iyeshia'})}),\n",
274
+ " Chunk(text=\"iyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\\n\\niyeshia: You can always confirm with your manager? Like. If the reason they said no, was it because there's certain maybe I would say physical years of like, how they what deadline they have for the New Year or the new budget. Time or deadline, was it? Did I miss it when I asked for a salary? Or when's the next time I should ask for a salary. Increase, and things like that. Cause your your department, or you would hope the team that you're on will show you throughout the year of like what's coming up and what you can expect.\\n\\niyeshia: So you definitely want to plan ahead next time. If they say no, and then review the work and the feedback asking for feedback. Was it my, the way that I would propose the raise? Is there anything I could do to get? You know better on that? That would help with the mentor, of course.\\n\\niyeshia: Cause the person you're proposing it to might not give the input. But definitely, a mentor is gonna help you with that as well to see what's going on. You could definitely check in with your manager. If they had any feedback they might tell your manager to like, let them know like this is why they might have said No or this? Why, they might have said, Not yet, or they'll say yes later. So keep that in mind.\\n\\niyeshia: and then let's see right\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='153-157', metadata={'start': datetime.timedelta(seconds=1553, microseconds=290000), 'end': datetime.timedelta(seconds=1648, microseconds=679000), 'speakers': frozenset({'iyeshia'})}),\n",
275
+ " Chunk(text=\"iyeshia: and then let's see right\\n\\niyeshia: from there we'll go to the activity.\\n\\niyeshia: And so from there, this is an activity of asking for feedback.\\n\\niyeshia: And we're gonna do a scenario of you want to ask for feedback from your manager.\\n\\niyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='157-161', metadata={'start': datetime.timedelta(seconds=1641, microseconds=970000), 'end': datetime.timedelta(seconds=1673, microseconds=539000), 'speakers': frozenset({'iyeshia'})}),\n",
276
+ " Chunk(text=\"iyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\\n\\niyeshia: So this part is, how would you start that conversation in your weekly check in?\\n\\niyeshia: So since we're virtual, we're gonna have, I'm gonna give you about 30 seconds to come up with your own answer, and then type it in the chat.\\n\\niyeshia: So review the scenario now and then we'll start in 30 seconds.\\n\\niyeshia: So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='161-165', metadata={'start': datetime.timedelta(seconds=1665, microseconds=550000), 'end': datetime.timedelta(seconds=1692, microseconds=620000), 'speakers': frozenset({'iyeshia'})}),\n",
277
+ " Chunk(text='iyeshia: So\\n\\niyeshia: we set the timer for 30.\\n\\niyeshia: Okay?\\n\\niyeshia: Goes now\\n\\niyeshia: 10 seconds.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='165-169', metadata={'start': datetime.timedelta(seconds=1691, microseconds=890000), 'end': datetime.timedelta(seconds=1727, microseconds=70000), 'speakers': frozenset({'iyeshia'})}),\n",
278
+ " Chunk(text='iyeshia: 10 seconds.\\n\\niyeshia: Okay, time is up.\\n\\niyeshia: Okay, nice.\\n\\niyeshia: And look for a raise on to guarantee a raise in this performance. Review. Awesome. Thank you. Ty\\n\\niyeshia: and Mckenzie. Thank you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='169-173', metadata={'start': datetime.timedelta(seconds=1725, microseconds=970000), 'end': datetime.timedelta(seconds=1767, microseconds=160000), 'speakers': frozenset({'iyeshia'})}),\n",
279
+ " Chunk(text='iyeshia: and Mckenzie. Thank you.\\n\\niyeshia: 13.\\n\\niyeshia: Some feedback to see what I can build. Awesome.\\n\\niyeshia: Hey, boys!\\n\\niyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='173-177', metadata={'start': datetime.timedelta(seconds=1765, microseconds=20000), 'end': datetime.timedelta(seconds=1785, microseconds=509000), 'speakers': frozenset({'iyeshia'})}),\n",
280
+ " Chunk(text='iyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.\\n\\niyeshia: No.\\n\\niyeshia: Okay.\\n\\niyeshia: Any improvement that you see that I cannot. Okay, thank you.\\n\\niyeshia: Let me check in with you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='177-181', metadata={'start': datetime.timedelta(seconds=1780, microseconds=400000), 'end': datetime.timedelta(seconds=1810, microseconds=859000), 'speakers': frozenset({'iyeshia'})}),\n",
281
+ " Chunk(text=\"iyeshia: Let me check in with you.\\n\\niyeshia: There we go.\\n\\niyeshia: Okay, perfect.\\n\\niyeshia: So what I can make for the next recycle. Awesome. Thank you all for sharing so far, I'm gonna move on to the the next part. I think I kind of skipped\\n\\niyeshia: ahead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='181-185', metadata={'start': datetime.timedelta(seconds=1807, microseconds=139000), 'end': datetime.timedelta(seconds=1830, microseconds=670000), 'speakers': frozenset({'iyeshia'})}),\n",
282
+ " Chunk(text=\"iyeshia: ahead.\\n\\niyeshia: Okay.\\n\\niyeshia: so right now, we have a role play example between a manager and you. Let's say you would.\\n\\niyeshia: it could be data science. Related. Right? So from here, I'm going to\\n\\niyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='185-189', metadata={'start': datetime.timedelta(seconds=1829, microseconds=480000), 'end': datetime.timedelta(seconds=1857, microseconds=657000), 'speakers': frozenset({'iyeshia'})}),\n",
283
+ " Chunk(text=\"iyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\\n\\niyeshia: So let me see who I can get.\\n\\niyeshia: Okay, I'll go with David for manager, and I'll go for\\n\\niyeshia: Let's try, Kevin for you.\\n\\niyeshia: If you have to read this role, play example.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='189-193', metadata={'start': datetime.timedelta(seconds=1850, microseconds=520000), 'end': datetime.timedelta(seconds=1877, microseconds=689000), 'speakers': frozenset({'iyeshia'})}),\n",
284
+ " Chunk(text='iyeshia: If you have to read this role, play example.\\n\\nDavid Rodriguez: Should I start now?\\n\\nCUNY Tech Prep (CTP): Kevin, you there?\\n\\nCUNY Tech Prep (CTP): Kevin? Chen.\\n\\nKevin Zheng: Right, right.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='193-197', metadata={'start': datetime.timedelta(seconds=1874, microseconds=660000), 'end': datetime.timedelta(seconds=1892, microseconds=270000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n",
285
+ " Chunk(text=\"Kevin Zheng: Right, right.\\n\\nCUNY Tech Prep (CTP): Alright!\\n\\nDavid Rodriguez: Great I'll start.\\n\\nDavid Rodriguez: Is there anything else you'd like to talk about?\\n\\nKevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='197-201', metadata={'start': datetime.timedelta(seconds=1891, microseconds=450000), 'end': datetime.timedelta(seconds=1910, microseconds=499000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'Kevin Zheng'})}),\n",
286
+ " Chunk(text=\"Kevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\\n\\nDavid Rodriguez: We really appreciate your hard work.\\n\\nDavid Rodriguez: but it's still a tough economy, and we're not really in a position to give you anything more than a 2% raise. We can talk about a raise at your next review in about 6 months.\\n\\nKevin Zheng: I do understand that the economy has made things difficult. Can we set a time to discuss my compensation again before my next schedule Review.\\n\\nKevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='201-205', metadata={'start': datetime.timedelta(seconds=1901, microseconds=690000), 'end': datetime.timedelta(seconds=1938, microseconds=959000), 'speakers': frozenset({'David Rodriguez', 'Kevin Zheng'})}),\n",
287
+ " Chunk(text=\"Kevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\\n\\nDavid Rodriguez: Sure that makes sense.\\n\\nDavid Rodriguez: I want to make sure you heard how about a month.\\n\\nKevin Zheng: Great. Thank you. I'll find some time on your calendar for us to meet.\\n\\niyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='205-209', metadata={'start': datetime.timedelta(seconds=1933, microseconds=720000), 'end': datetime.timedelta(seconds=1967, microseconds=303000), 'speakers': frozenset({'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n",
288
+ " Chunk(text=\"iyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\\n\\niyeshia: that during the the role play. That the let's say the data scientists who was played by Kevin,\\n\\niyeshia: did as far as like, maybe something different from your responses that you put in the chat. Did y'all notice anything differently?\\n\\niyeshia: Hey, Devin?\\n\\nDevin Xie (no cam): I don't know if I'm correct. But I think\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='209-213', metadata={'start': datetime.timedelta(seconds=1957, microseconds=300000), 'end': datetime.timedelta(seconds=2005, microseconds=496000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
289
+ " Chunk(text=\"Devin Xie (no cam): I don't know if I'm correct. But I think\\n\\nDevin Xie (no cam): the data scientists or us in this situation, we try to like Scheduler, a review like\\n\\nDevin Xie (no cam): in a later time.\\n\\niyeshia: absolutely. Thank you. He took initiative and be like, you know, hey, let me, let me get on your calendar for next time, instead of just like waiting around, you know, people be like, Oh, I'll get back to you and things like that. He's like, no, we can. We can discuss later, like, what's your schedule like? So that\\n\\niyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='213-217', metadata={'start': datetime.timedelta(seconds=2002, microseconds=950000), 'end': datetime.timedelta(seconds=2041, microseconds=590000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n",
290
+ " Chunk(text=\"iyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\\n\\niyeshia: So and so, for now I would say this would take about maybe\\n\\niyeshia: so final reflection. We could talk about this for like maybe 3\\xa0min, or anybody could just like popcorn it out unless I just call on them. But for today's learning from the workshop what are some things you can generally expect when you 1st join a company? What is a manager's role in your success? And how do you find out your measures of success? Does anyone want to\\n\\niyeshia: volunteer and answer any of the any of the 3 questions that are of their choice\\n\\niyeshia: before I call on someone.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='217-221', metadata={'start': datetime.timedelta(seconds=2035, microseconds=850000), 'end': datetime.timedelta(seconds=2087, microseconds=550000), 'speakers': frozenset({'iyeshia'})}),\n",
291
+ " Chunk(text=\"iyeshia: before I call on someone.\\n\\niyeshia: Okay, anybody but Devin.\\n\\niyeshia: See, I'm gonna go with anthony.\\n\\nAnthony Jerez: Yes, I'm here.\\n\\niyeshia: Which question would you like to answer? You had to reflect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='221-225', metadata={'start': datetime.timedelta(seconds=2086, microseconds=20000), 'end': datetime.timedelta(seconds=2122, microseconds=210000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
292
+ " Chunk(text=\"iyeshia: Which question would you like to answer? You had to reflect.\\n\\nAnthony Jerez: On, I would say the 1st one.\\n\\niyeshia: Okay, go for it.\\n\\nAnthony Jerez: So some major things that I would expect would be we're going through like sessions like orientation, and like onboarding\\n\\nAnthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='225-229', metadata={'start': datetime.timedelta(seconds=2119, microseconds=390000), 'end': datetime.timedelta(seconds=2147, microseconds=390000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
293
+ " Chunk(text=\"Anthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\\n\\nAnthony Jerez: And yeah, stuff like that. I would say.\\n\\niyeshia: Thank you, Anthony, for sharing.\\n\\niyeshia: and then let me see, trying to see who's not making eye contact. Oh, oh, not everybody looks okay. So let's go with\\n\\niyeshia: Ibrahim.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='229-233', metadata={'start': datetime.timedelta(seconds=2139, microseconds=43000), 'end': datetime.timedelta(seconds=2167, microseconds=810000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n",
294
+ " Chunk(text=\"iyeshia: Ibrahim.\\n\\nIbrahim Faruquee: Yeah, I'll answer question, too.\\n\\nIbrahim Faruquee: So your manager's role is mainly like for the company to manage like people and make sure that the right persons for the right job, but they can be like a mentor figure for you. So like, if there can be like good mentors who like help you throughout the process and help you with a raise, or they could also like, be difficult and make that like harder for you. But they're kind of. It's not like there's nothing to be, I guess, expected from a manager. It's just like\\n\\nIbrahim Faruquee: what they like. What do you, I guess. What do you end up with.\\n\\nIbrahim Faruquee: or what do you make the most of.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='233-237', metadata={'start': datetime.timedelta(seconds=2166, microseconds=780000), 'end': datetime.timedelta(seconds=2208, microseconds=880000), 'speakers': frozenset({'iyeshia', 'Ibrahim Faruquee'})}),\n",
295
+ " Chunk(text=\"Ibrahim Faruquee: or what do you make the most of.\\n\\niyeshia: Awesome. Thank you.\\n\\niyeshia: And then for the 3rd question.\\n\\niyeshia: and we're gonna go for Isabel.\\n\\nIsabel Loçi: Hello!\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='237-241', metadata={'start': datetime.timedelta(seconds=2207, microseconds=390000), 'end': datetime.timedelta(seconds=2223, microseconds=750000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia', 'Ibrahim Faruquee'})}),\n",
296
+ " Chunk(text=\"Isabel Loçi: Hello!\\n\\niyeshia: Hello!\\n\\nIsabel Loçi: Sorry. My Internet's horrible, and might I might disconnect?\\n\\nIsabel Loçi: I'll see if I can answer the 3rd one. How do you find your measures of success.\\n\\nIsabel Loçi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='241-245', metadata={'start': datetime.timedelta(seconds=2222, microseconds=900000), 'end': datetime.timedelta(seconds=2245, microseconds=189000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia'})}),\n",
297
+ " Chunk(text=\"Isabel Loçi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\\n\\nIsabel Loçi: That way you get a better understanding of where you are right now. And also I would say to also look back on the goals that you've set for yourself, and see if you've reached those goals as well, and that would be a good measure of success.\\n\\niyeshia: Okay, very good. All right.\\n\\niyeshia: So yeah, definitely helped make my life easier with this presentation. So thank you. I'm glad things are sticking and so with that said, We will go and launch Kahoo. But before I do that I definitely want to say just be mindful of these things.\\n\\niyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='245-249', metadata={'start': datetime.timedelta(seconds=2238, microseconds=660000), 'end': datetime.timedelta(seconds=2306, microseconds=319000), 'speakers': frozenset({'Isabel Loçi', 'iyeshia'})}),\n",
298
+ " Chunk(text=\"iyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\\n\\niyeshia: So just keep that in mind.\\n\\niyeshia: And then, if you are going to seek, you know, support, I think. It was great that it's a bell, stated asking for feedback from your manager, but you could also ask for feedback from your teammates, too. Cause they, if you work with them closely. If you have a team to see, like what your areas of strengths are your areas of growth.\\n\\niyeshia: and things that you're learning. That could be helpful. Towards that process if you're going up for a raise. But sometimes people could see our strengths stronger or clearer, or even faster than we can, and we don't even realize it.\\n\\niyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='249-253', metadata={'start': datetime.timedelta(seconds=2282, microseconds=771000), 'end': datetime.timedelta(seconds=2345, microseconds=799000), 'speakers': frozenset({'iyeshia'})}),\n",
299
+ " Chunk(text=\"iyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\\n\\niyeshia: if you are going to negotiate, remember to keep for raise, to keep that documented focus on your skills. Make sure you do your research on the market and definitely, just try to figure out if you can negotiate other things.\\n\\niyeshia: And when it comes to relationships, at work, you wanna make sure to treat everybody equally so I hope that that helps. If you didn't get anything else. I hope that's what helps you with them\\n\\niyeshia: with your 1st year? As you enter into your careers. And so with that said, we'll go into Kahoot.\\n\\niyeshia: and so I'm going to launch it now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='253-257', metadata={'start': datetime.timedelta(seconds=2341, microseconds=80000), 'end': datetime.timedelta(seconds=2390, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n",
300
+ " Chunk(text=\"iyeshia: and so I'm going to launch it now.\\n\\niyeshia: Let's get it started.\\n\\niyeshia: I don't think my headphones died so\\n\\niyeshia: got 33 people on here, and only 16.\\n\\niyeshia: Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='257-261', metadata={'start': datetime.timedelta(seconds=2387, microseconds=420000), 'end': datetime.timedelta(seconds=2445, microseconds=90000), 'speakers': frozenset({'iyeshia'})}),\n",
301
+ " Chunk(text=\"iyeshia: Okay.\\n\\niyeshia: sound. Good.\\n\\niyeshia: 33.\\n\\niyeshia: Well, I didn't cut myself. That's Kevin. You're playing too.\\n\\niyeshia: Figure out how to be successful on my own.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='261-265', metadata={'start': datetime.timedelta(seconds=2444, microseconds=230000), 'end': datetime.timedelta(seconds=2550, microseconds=965000), 'speakers': frozenset({'iyeshia'})}),\n",
302
+ " Chunk(text=\"iyeshia: Figure out how to be successful on my own.\\n\\niyeshia: Oh, you do not have to figure that out.\\n\\niyeshia: That's why we tell you, have mentors, extra peers and things of that nature.\\n\\niyeshia: Well, yeah, shout out to the 22. It's okay. One. I'll take the 22 others, you know. Wow!\\n\\niyeshia: Your boss. My goodness, okay, is in the lead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='265-269', metadata={'start': datetime.timedelta(seconds=2547, microseconds=780000), 'end': datetime.timedelta(seconds=2583, microseconds=779000), 'speakers': frozenset({'iyeshia'})}),\n",
303
+ " Chunk(text=\"iyeshia: Your boss. My goodness, okay, is in the lead.\\n\\niyeshia: So let's go ahead\\n\\niyeshia: who should not go to\\n\\niyeshia: thank you definitely. The worst thing you could do is talk to no one. If you need support with something.\\n\\niyeshia: So I hope.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='269-273', metadata={'start': datetime.timedelta(seconds=2578, microseconds=507000), 'end': datetime.timedelta(seconds=2624, microseconds=130000), 'speakers': frozenset({'iyeshia'})}),\n",
304
+ " Chunk(text=\"iyeshia: So I hope.\\n\\nCUNY Tech Prep (CTP): I am shocked.\\n\\niyeshia: That one should you not go to? So yeah.\\n\\niyeshia: let's see. Okay, Jamie is in the name.\\n\\niyeshia: Okay, let's go.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='273-277', metadata={'start': datetime.timedelta(seconds=2622, microseconds=675000), 'end': datetime.timedelta(seconds=2641, microseconds=959000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
305
+ " Chunk(text=\"iyeshia: Okay, let's go.\\n\\niyeshia: 3rd question, what are not considerations to mention when providing reasons for a salary increase.\\n\\niyeshia: There aren't enough.\\n\\niyeshia: Okay? 18. Yes, the cost of living. That is correct. You should not consider that\\n\\niyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='277-281', metadata={'start': datetime.timedelta(seconds=2640, microseconds=140000), 'end': datetime.timedelta(seconds=2695, microseconds=309000), 'speakers': frozenset({'iyeshia'})}),\n",
306
+ " Chunk(text=\"iyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\\n\\niyeshia: or virtual.\\n\\niyeshia: our office in California, we have no idea. So yeah, just just keep that in mind. So good job to the the cost of living folks.\\n\\niyeshia: Okay, David Rv is in the lead.\\n\\niyeshia: Okay, let's go to the next question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='281-285', metadata={'start': datetime.timedelta(seconds=2680, microseconds=250000), 'end': datetime.timedelta(seconds=2715, microseconds=419000), 'speakers': frozenset({'iyeshia'})}),\n",
307
+ " Chunk(text=\"iyeshia: Okay, let's go to the next question.\\n\\niyeshia: what is a thoughtful way to actually negotiate?\\n\\niyeshia: So we can negotiate? Very good. It's a thoughtful way to act\\n\\niyeshia: and I think most of y'all got that in the chat. I saw some other answers. I'm gonna leave that questionable. But for the ones who did shout out to y'all.\\n\\niyeshia: So I think this is the last question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='285-289', metadata={'start': datetime.timedelta(seconds=2712, microseconds=460000), 'end': datetime.timedelta(seconds=2758, microseconds=389000), 'speakers': frozenset({'iyeshia'})}),\n",
308
+ " Chunk(text=\"iyeshia: So I think this is the last question.\\n\\niyeshia: But Kyle is in the lead now, and so shouts to Kyle. So here goes the last question.\\n\\niyeshia: The most important relationship at work is with my manager.\\n\\niyeshia: Shout out to the people who said, False I said, it is important, but not the most important. Yeah, there's team this\\n\\niyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='289-293', metadata={'start': datetime.timedelta(seconds=2755, microseconds=680000), 'end': datetime.timedelta(seconds=2795, microseconds=579000), 'speakers': frozenset({'iyeshia'})}),\n",
309
+ " Chunk(text=\"iyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\\n\\niyeshia: yeah, let's always about that. So let's go to the windows.\\n\\niyeshia: Okay, let's okay.\\n\\niyeshia: Number one.\\n\\niyeshia: Okay, at the bottom.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='293-297', metadata={'start': datetime.timedelta(seconds=2788, microseconds=670000), 'end': datetime.timedelta(seconds=2827, microseconds=966000), 'speakers': frozenset({'iyeshia'})}),\n",
310
+ " Chunk(text=\"iyeshia: Okay, at the bottom.\\n\\niyeshia: Okay, with that, said\\n\\niyeshia: the last thing I will do. These are some follow up questions that you can ask your career coach. If I'm your career coach, you could definitely ask me that.\\n\\niyeshia: But how much of a raise. Can you ask for? When do you? Should you start a retirement fund? I would say, Asap, how long should you take to figure out if your company is a good fit, and how do you approach a conflict with a manager or coworker? So if you have any questions about those, please feel free to reach out to me or your career coach, if you would like to discuss further details, and I do want to be mindful of time.\\n\\niyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='297-301', metadata={'start': datetime.timedelta(seconds=2822, microseconds=600000), 'end': datetime.timedelta(seconds=2879, microseconds=310000), 'speakers': frozenset({'iyeshia'})}),\n",
311
+ " Chunk(text=\"iyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\\n\\niyeshia: and help me to deliver it better or worse. So if I did a good job, that's great. But I'm going to put this in the chat.\\n\\niyeshia: So you could fill that out now and then. Also want to invite you all to Rsvp. For Ctp's graduation.\\n\\niyeshia: So I would say, you can do that right now as well\\n\\niyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='301-305', metadata={'start': datetime.timedelta(seconds=2870, microseconds=460000), 'end': datetime.timedelta(seconds=2919, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n",
312
+ " Chunk(text=\"iyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\\n\\niyeshia: and your projects. But for those who can't attend come through. It's going to be great to see your projects to see each other one last time, like Demo Night. And it's gonna be it's going to be a great time as we close out the the cohort in in May. So, and also to Devin's question, just one more time. We won't leave you hanging you will get an invite to be alumni\\n\\niyeshia: for Ctp, and that way you'll be with everybody who did the cohorts before your cohorts, one through 9 and so it'll be one through 10 now. And so that'll be like over a thousand people in that slack channel. So you can definitely network with your peers and the people who came before you. So yeah, just keep that in mind.\\n\\niyeshia: So thank you all. And I will stop sharing.\\n\\niyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='305-309', metadata={'start': datetime.timedelta(seconds=2901, microseconds=130000), 'end': datetime.timedelta(seconds=2988, microseconds=469000), 'speakers': frozenset({'iyeshia'})}),\n",
313
+ " Chunk(text=\"iyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\\n\\nCUNY Tech Prep (CTP): Definitely. Thank you, Aisha, for the valuable tips. I think. A lot of students, a lot of the students I've spoken to, at least are.\\n\\nCUNY Tech Prep (CTP): have got recently gotten jobs or are very close to getting them, and\\n\\nCUNY Tech Prep (CTP): they will find this material very useful. I'm actually kind of glad I remember to click record at the beginning, because some of them are like in traffic right now.\\n\\niyeshia: Got it. Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='309-313', metadata={'start': datetime.timedelta(seconds=2964, microseconds=60000), 'end': datetime.timedelta(seconds=3011, microseconds=947000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
314
+ " Chunk(text=\"iyeshia: Got it. Okay.\\n\\niyeshia: I'm glad.\\n\\nCUNY Tech Prep (CTP): Okay, thank you. So I'm gonna give you all 10\\xa0min to fill this out. Since you got 2 things to fill out. One is the inviting yourself to the graduation, and then 2 is the survey.\\n\\nCUNY Tech Prep (CTP): Alright, so we will come back at 7, 35.\\n\\nCUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='313-317', metadata={'start': datetime.timedelta(seconds=3010, microseconds=980000), 'end': datetime.timedelta(seconds=3063, microseconds=720000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n",
315
+ " Chunk(text=\"CUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\\n\\nCUNY Tech Prep (CTP): There's no homework for the next 2 weeks, and there's spring break. So which means.\\n\\nCUNY Tech Prep (CTP): after this class, I'll be seeing you the second Friday from now.\\n\\nCUNY Tech Prep (CTP): Not next Friday.\\n\\nCUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='317-321', metadata={'start': datetime.timedelta(seconds=3060, microseconds=740000), 'end': datetime.timedelta(seconds=3115, microseconds=180000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
316
+ " Chunk(text='CUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\\n\\nCUNY Tech Prep (CTP): This is time to do your projects.\\n\\nCUNY Tech Prep (CTP): Alright, so just as a gift to all the people who are in class.\\n\\nCUNY Tech Prep (CTP): If you check the homework sheet.\\n\\nCUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='321-325', metadata={'start': datetime.timedelta(seconds=3110, microseconds=350000), 'end': datetime.timedelta(seconds=3275, microseconds=10000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
317
+ " Chunk(text=\"CUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.\\n\\nCUNY Tech Prep (CTP): I'll let you figure out which one that is\\n\\nCUNY Tech Prep (CTP): alright. We're back.\\n\\nCUNY Tech Prep (CTP): So go for the rest of this day. So we're gonna I'm gonna put you in breakout rooms\\n\\nCUNY Tech Prep (CTP): for your projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='325-329', metadata={'start': datetime.timedelta(seconds=3269, microseconds=390000), 'end': datetime.timedelta(seconds=3591, microseconds=359000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
318
+ " Chunk(text='CUNY Tech Prep (CTP): for your projects.\\n\\nCUNY Tech Prep (CTP): And what I want you to do is I need to think about the state of the project. You, the the state the project is in.\\n\\nCUNY Tech Prep (CTP): I will be coming around to check in\\n\\nCUNY Tech Prep (CTP): because you have 2 weeks and no homework.\\n\\nCUNY Tech Prep (CTP): I want you to put your all into the project. So', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='329-333', metadata={'start': datetime.timedelta(seconds=3589, microseconds=600000), 'end': datetime.timedelta(seconds=3613, microseconds=269000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
319
+ " Chunk(text='CUNY Tech Prep (CTP): I want you to put your all into the project. So\\n\\nCUNY Tech Prep (CTP): let me make the breakout rooms first.st\\n\\nCUNY Tech Prep (CTP): Basically, what I want you to do is plan out the next 2 weeks. Okay, what do you want? What? What is missing from\\n\\nCUNY Tech Prep (CTP): your project that you need to complete it?\\n\\nCUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='333-337', metadata={'start': datetime.timedelta(seconds=3609, microseconds=440000), 'end': datetime.timedelta(seconds=3646, microseconds=619000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
320
+ " Chunk(text=\"CUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?\\n\\nCUNY Tech Prep (CTP): Because after the next 2 weeks you literally have only 2 weeks left.\\n\\nCUNY Tech Prep (CTP): There's class. There's week 11, and then there's week 12\\n\\nCUNY Tech Prep (CTP): week. 13 is like May May 10th or May 9, th\\n\\nCUNY Tech Prep (CTP): and then the week after that, I believe, is\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='337-341', metadata={'start': datetime.timedelta(seconds=3643, microseconds=720000), 'end': datetime.timedelta(seconds=3672, microseconds=696000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n",
321
+ " Chunk(text=\"CUNY Tech Prep (CTP): and then the week after that, I believe, is\\n\\nCUNY Tech Prep (CTP): when you're going to do Demos.\\n\\nCUNY Tech Prep (CTP): I could be wrong.\\n\\nCUNY Tech Prep (CTP): Alright. You can pick the rooms. Now go into your rooms.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='341-344', metadata={'start': datetime.timedelta(seconds=3670, microseconds=320000), 'end': datetime.timedelta(seconds=3682, microseconds=370000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}))"
322
+ ]
323
+ },
324
+ "execution_count": 13,
325
+ "metadata": {},
326
+ "output_type": "execute_result"
327
+ }
328
+ ],
329
+ "source": [
330
+ "web_vtt_content.get_chunks()"
331
+ ]
332
+ }
333
+ ],
334
+ "metadata": {
335
+ "kernelspec": {
336
+ "display_name": ".venv",
337
+ "language": "python",
338
+ "name": "python3"
339
+ },
340
+ "language_info": {
341
+ "codemirror_mode": {
342
+ "name": "ipython",
343
+ "version": 3
344
+ },
345
+ "file_extension": ".py",
346
+ "mimetype": "text/x-python",
347
+ "name": "python",
348
+ "nbconvert_exporter": "python",
349
+ "pygments_lexer": "ipython3",
350
+ "version": "3.12.3"
351
+ }
352
+ },
353
+ "nbformat": 4,
354
+ "nbformat_minor": 2
355
+ }
pyproject.toml CHANGED
@@ -21,10 +21,10 @@ classifiers = [
21
  dependencies = [
22
  "pydantic>=2.11.2",
23
  "pydantic-settings>=2.8.1",
 
24
  "more-itertools>=10.6.0",
25
  "python-dotenv>=1.1.0",
26
  "loguru>=0.7.3",
27
- "fastapi>=0.115.12",
28
  "dependency-injector>=4.46.0",
29
  "pytz>=2025.2",
30
  "apscheduler>=3.11.0",
@@ -36,7 +36,10 @@ dependencies = [
36
  "slack_bolt>=1.23.0",
37
  "pymongo>=4.11.3 ",
38
  "motor>=3.7.0",
39
- "openai>=1.70.0"
 
 
 
40
  ]
41
 
42
  [project.optional-dependencies]
@@ -47,7 +50,7 @@ dev = [
47
  "types-pytz>=2025.2",
48
  "black>=25.1.0",
49
  "isort>=6.0.1",
50
- "ruff>=0.11.4",
51
  ]
52
 
53
  [project.urls]
 
21
  dependencies = [
22
  "pydantic>=2.11.2",
23
  "pydantic-settings>=2.8.1",
24
+ "cachetools>=5.5.2",
25
  "more-itertools>=10.6.0",
26
  "python-dotenv>=1.1.0",
27
  "loguru>=0.7.3",
 
28
  "dependency-injector>=4.46.0",
29
  "pytz>=2025.2",
30
  "apscheduler>=3.11.0",
 
36
  "slack_bolt>=1.23.0",
37
  "pymongo>=4.11.3 ",
38
  "motor>=3.7.0",
39
+ "openai>=1.70.0",
40
+ "google-api-python-client>=2.167.0",
41
+ "google-auth>=2.39.0",
42
+ "google-auth-oauthlib>=1.2.1"
43
  ]
44
 
45
  [project.optional-dependencies]
 
50
  "types-pytz>=2025.2",
51
  "black>=25.1.0",
52
  "isort>=6.0.1",
53
+ "ruff>=0.11.4"
54
  ]
55
 
56
  [project.urls]
src/ctp_slack_bot/app.py CHANGED
@@ -1,9 +1,24 @@
1
- from asyncio import run
2
  from loguru import logger
 
 
3
 
4
  from ctp_slack_bot.containers import Container
5
  from ctp_slack_bot.core.logging import setup_logging
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  async def main() -> None:
8
  # Setup logging.
9
  setup_logging()
@@ -16,19 +31,23 @@ async def main() -> None:
16
  # Kick off services which should be active from the start.
17
  container.content_ingestion_service()
18
  container.question_dispatch_service()
 
19
 
20
- # Start the scheduler.
21
- schedule_service = container.schedule_service()
22
- schedule_service.start()
23
-
24
- # Start the Slack socket mode handler in a background thread.
25
  socket_mode_handler = container.socket_mode_handler()
26
- logger.info("Starting Slack Socket Mode handler…")
27
- await socket_mode_handler.start_async()
28
-
29
- # Shutdown. (This will never execute, because the socket mode handler never returns.)
30
- logger.info("Shutting down application…")
31
- schedule_service.stop()
 
 
 
 
 
 
 
32
 
33
  if __name__ == "__main__":
34
  run(main())
 
1
+ from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
2
  from loguru import logger
3
+ from signal import SIGINT, SIGTERM
4
+ from typing import Any, Callable
5
 
6
  from ctp_slack_bot.containers import Container
7
  from ctp_slack_bot.core.logging import setup_logging
8
 
9
+ async def handle_shutdown_signal() -> None:
10
+ logger.info("Received shutdown signal.")
11
+ for task in all_tasks():
12
+ if task is not current_task() and not task.done():
13
+ task.cancel()
14
+ logger.trace("Cancelled task {}.", task.get_name())
15
+ logger.info("Cancelled all tasks.")
16
+
17
+ def create_shutdown_signal_handler() -> Callable[[], None]:
18
+ def shutdown_signal_handler() -> None:
19
+ create_task(handle_shutdown_signal())
20
+ return shutdown_signal_handler
21
+
22
  async def main() -> None:
23
  # Setup logging.
24
  setup_logging()
 
31
  # Kick off services which should be active from the start.
32
  container.content_ingestion_service()
33
  container.question_dispatch_service()
34
+ container.schedule_service()
35
 
36
+ # Start the Slack socket mode handler in the background.
 
 
 
 
37
  socket_mode_handler = container.socket_mode_handler()
38
+ slack_bolt_task = create_task(socket_mode_handler.start_async())
39
+ shutdown_signal_handler = create_shutdown_signal_handler()
40
+ loop = get_running_loop()
41
+ loop.add_signal_handler(SIGINT, shutdown_signal_handler)
42
+ loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
43
+ try:
44
+ logger.info("Starting Slack Socket Mode handler…")
45
+ await slack_bolt_task
46
+ except CancelledError:
47
+ logger.info("Shutting down application…")
48
+ finally:
49
+ await socket_mode_handler.close_async()
50
+ await container.shutdown_resources()
51
 
52
  if __name__ == "__main__":
53
  run(main())
src/ctp_slack_bot/containers.py CHANGED
@@ -11,9 +11,10 @@ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionSer
11
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
12
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
13
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
14
  from ctp_slack_bot.services.language_model_service import LanguageModelService
15
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
16
- from ctp_slack_bot.services.schedule_service import ScheduleService
17
  from ctp_slack_bot.services.slack_service import SlackServiceResource
18
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
19
  from ctp_slack_bot.services.vectorization_service import VectorizationService
@@ -22,7 +23,7 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
22
  class Container(DeclarativeContainer):
23
  settings = Singleton(Settings)
24
  event_brokerage_service = Singleton(EventBrokerageService)
25
- schedule_service = Singleton(ScheduleService, settings=settings)
26
  mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
27
  vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
28
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
@@ -36,3 +37,4 @@ class Container(DeclarativeContainer):
36
  slack_bolt_app = Singleton(AsyncApp, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value())
37
  slack_service = Resource(SlackServiceResource, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app)
38
  socket_mode_handler = Singleton(lambda _, app, app_token: AsyncSocketModeHandler(app, app_token), slack_service, slack_bolt_app, settings.provided.SLACK_APP_TOKEN().get_secret_value())
 
 
11
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
12
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
13
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
14
+ from ctp_slack_bot.services.google_drive_service import GoogleDriveService
15
  from ctp_slack_bot.services.language_model_service import LanguageModelService
16
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
17
+ from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
18
  from ctp_slack_bot.services.slack_service import SlackServiceResource
19
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
20
  from ctp_slack_bot.services.vectorization_service import VectorizationService
 
23
  class Container(DeclarativeContainer):
24
  settings = Singleton(Settings)
25
  event_brokerage_service = Singleton(EventBrokerageService)
26
+ schedule_service = Resource(ScheduleServiceResource, settings=settings)
27
  mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
28
  vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
29
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
 
37
  slack_bolt_app = Singleton(AsyncApp, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value())
38
  slack_service = Resource(SlackServiceResource, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app)
39
  socket_mode_handler = Singleton(lambda _, app, app_token: AsyncSocketModeHandler(app, app_token), slack_service, slack_bolt_app, settings.provided.SLACK_APP_TOKEN().get_secret_value())
40
+ google_drive_service = Singleton(GoogleDriveService, settings=settings)
src/ctp_slack_bot/core/config.py CHANGED
@@ -39,7 +39,7 @@ class Settings(BaseSettings):
39
  SCORE_THRESHOLD: NonNegativeFloat
40
 
41
  # Hugging Face Configuration
42
- HF_API_TOKEN: Optional[SecretStr] = None
43
 
44
  # OpenAI Configuration
45
  OPENAI_API_KEY: SecretStr
@@ -48,6 +48,22 @@ class Settings(BaseSettings):
48
  TEMPERATURE: NonNegativeFloat
49
  SYSTEM_PROMPT: str
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  model_config = SettingsConfigDict(
52
  env_file=".env",
53
  env_file_encoding="utf-8",
 
39
  SCORE_THRESHOLD: NonNegativeFloat
40
 
41
  # Hugging Face Configuration
42
+ HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
43
 
44
  # OpenAI Configuration
45
  OPENAI_API_KEY: SecretStr
 
48
  TEMPERATURE: NonNegativeFloat
49
  SYSTEM_PROMPT: str
50
 
51
+ # Google Drive Configuration
52
+ GOOGLE_DRIVE_ROOT_ID: str
53
+ GOOGLE_PROJECT_ID: str
54
+ GOOGLE_PRIVATE_KEY_ID: SecretStr
55
+ GOOGLE_PRIVATE_KEY: SecretStr
56
+ GOOGLE_CLIENT_ID: str
57
+ GOOGLE_CLIENT_EMAIL: str
58
+ GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth"
59
+ GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
60
+ GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs"
61
+ GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com"
62
+ GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com"
63
+
64
+ # File Monitoring Configuration
65
+ FILE_MONITOR_ROOT_PATH: Optional[str] = None
66
+
67
  model_config = SettingsConfigDict(
68
  env_file=".env",
69
  env_file_encoding="utf-8",
src/ctp_slack_bot/core/logging.py CHANGED
@@ -1,4 +1,4 @@
1
- from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
2
  from loguru import logger
3
  from os import getenv
4
  from sys import stderr
@@ -90,7 +90,9 @@ def setup_logging() -> None:
90
  basicConfig(handlers=[InterceptHandler()], level=0, force=True)
91
 
92
  # Update logging levels for some noisy libraries.
93
- for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"):
94
  getLogger(logger_name).setLevel(INFO)
 
 
95
 
96
  logger.info(f"Logging configured with level {log_level}")
 
1
+ from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
2
  from loguru import logger
3
  from os import getenv
4
  from sys import stderr
 
90
  basicConfig(handlers=[InterceptHandler()], level=0, force=True)
91
 
92
  # Update logging levels for some noisy libraries.
93
+ for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
94
  getLogger(logger_name).setLevel(INFO)
95
+ for logger_name in ("apscheduler"):
96
+ getLogger(logger_name).setLevel(WARNING)
97
 
98
  logger.info(f"Logging configured with level {log_level}")
src/ctp_slack_bot/core/response_rendering.py DELETED
@@ -1,13 +0,0 @@
1
- from json import dumps
2
- from starlette.responses import JSONResponse
3
- from typing import Any, Self
4
-
5
- class PrettyJSONResponse(JSONResponse):
6
- def render(self: Self, content: Any) -> bytes:
7
- return dumps(
8
- content,
9
- ensure_ascii=False,
10
- allow_nan=False,
11
- indent=4,
12
- separators=(", ", ": "),
13
- ).encode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/db/mongo_db.py CHANGED
@@ -1,13 +1,14 @@
1
- from dependency_injector.resources import Resource
 
2
  from motor.motor_asyncio import AsyncIOMotorClient
3
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
4
  from pymongo.operations import SearchIndexModel
5
  from loguru import logger
6
  from pydantic import BaseModel, PrivateAttr
7
  from typing import Any, Dict, Optional, Self
8
- import asyncio
9
 
10
  from ctp_slack_bot.core.config import Settings
 
11
 
12
  class MongoDB(BaseModel):
13
  """
@@ -16,23 +17,20 @@ class MongoDB(BaseModel):
16
  settings: Settings
17
  _client: PrivateAttr = PrivateAttr()
18
  _db: PrivateAttr = PrivateAttr()
19
-
20
  class Config:
21
  arbitrary_types_allowed = True
22
-
23
  def __init__(self: Self, **data: Dict[str, Any]) -> None:
24
  super().__init__(**data)
25
  logger.debug("Created {}", self.__class__.__name__)
26
-
27
  def connect(self: Self) -> None:
28
  """Initialize MongoDB client with settings."""
29
  try:
30
  connection_string = self.settings.MONGODB_URI.get_secret_value()
31
- logger.debug("Connecting to MongoDB using URI: {}", connection_string.replace(
32
- connection_string.split('@')[-1].split('/')[0] if '@' in connection_string else '',
33
- '[REDACTED]'
34
- ))
35
-
36
  # Create client with appropriate settings
37
  self._client = AsyncIOMotorClient(
38
  connection_string,
@@ -43,48 +41,48 @@ class MongoDB(BaseModel):
43
  retryWrites=True,
44
  w="majority"
45
  )
46
-
47
  # Set database
48
  db_name = self.settings.MONGODB_NAME
49
-
50
  self._db = self._client[db_name]
51
  logger.debug("MongoDB client initialized for database: {}", db_name)
52
-
53
  except Exception as e:
54
  logger.error("Failed to initialize MongoDB client: {}", e)
55
  self._client = None
56
  self._db = None
57
  raise
58
-
59
  @property
60
  def client(self: Self) -> AsyncIOMotorClient:
61
  """Get the MongoDB client instance."""
62
  if not hasattr(self, '_client') or self._client is None:
63
- logger.warning("MongoDB client not initialized. Attempting to initialize.")
64
  self.connect()
65
  if not hasattr(self, '_client') or self._client is None:
66
- raise ConnectionError("Failed to initialize MongoDB client")
67
  return self._client
68
-
69
  @property
70
  def db(self: Self) -> Any:
71
  """Get the MongoDB database instance."""
72
  if not hasattr(self, '_db') or self._db is None:
73
- logger.warning("MongoDB database not initialized. Attempting to initialize client.")
74
  self.connect()
75
  if not hasattr(self, '_db') or self._db is None:
76
- raise ConnectionError("Failed to initialize MongoDB database")
77
  return self._db
78
-
79
  async def ping(self: Self) -> bool:
80
  """Check if MongoDB connection is alive."""
81
  try:
82
  # Get client to ensure we're connected
83
  client = self.client
84
-
85
  # Try a simple ping command
86
  await client.admin.command('ping')
87
- logger.debug("MongoDB connection is active")
88
  return True
89
  except (ConnectionFailure, ServerSelectionTimeoutError) as e:
90
  logger.error("MongoDB connection failed: {}", e)
@@ -92,7 +90,7 @@ class MongoDB(BaseModel):
92
  except Exception as e:
93
  logger.error("Unexpected error during MongoDB ping: {}", e)
94
  return False
95
-
96
  async def get_collection(self: Self, name: str) -> Any:
97
  """
98
  Get a collection by name with validation.
@@ -100,29 +98,29 @@ class MongoDB(BaseModel):
100
  """
101
  # First ensure we can connect at all
102
  if not await self.ping():
103
- logger.error("Cannot get collection '{}' - MongoDB connection is not available", name)
104
- raise ConnectionError("MongoDB connection is not available")
105
-
106
  try:
107
  # Get all collection names to check if this one exists
108
- logger.debug("Checking if collection '{}' exists", name)
109
  collection_names = await self.db.list_collection_names()
110
 
111
  if name not in collection_names:
112
- logger.info("Collection '{}' does not exist. Creating it.", name)
113
  # Create the collection
114
  await self.db.create_collection(name)
115
- logger.debug("Successfully created collection '{}'", name)
116
  else:
117
- logger.debug("Collection '{}' already exists", name)
118
-
119
  # Get and return the collection
120
  collection = self.db[name]
121
  return collection
122
  except Exception as e:
123
  logger.error("Error accessing collection '{}': {}", name, e)
124
  raise
125
-
126
  async def create_indexes(self: Self, collection_name: str) -> None:
127
  """
128
  Create a vector search index on a collection.
@@ -131,7 +129,7 @@ class MongoDB(BaseModel):
131
  collection_name: Name of the collection
132
  """
133
  collection = await self.get_collection(collection_name)
134
-
135
  try:
136
  # Create search index model using MongoDB's recommended approach
137
  search_index_model = SearchIndexModel(
@@ -149,41 +147,38 @@ class MongoDB(BaseModel):
149
  name=f"{collection_name}_vector_index",
150
  type="vectorSearch"
151
  )
152
-
153
  # Create the search index using the motor collection
154
  result = await collection.create_search_index(search_index_model)
155
- logger.info("Vector search index '{}' created for collection {}", result, collection_name)
156
-
157
  except Exception as e:
158
  if "command not found" in str(e).lower():
159
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
160
  # Create a fallback standard index on embedding field
161
  await collection.create_index("embedding")
162
- logger.info("Created standard index on 'embedding' field as fallback")
163
  else:
164
  logger.error("Failed to create vector index: {}", e)
165
  raise
166
-
167
  async def close(self: Self) -> None:
168
  """Close MongoDB connection."""
169
  if self._client:
170
  self._client.close()
171
- logger.info("MongoDB connection closed")
172
  self._client = None
173
  self._db = None
174
 
175
- class MongoDBResource(Resource):
176
- def init(self: Self, settings: Settings) -> MongoDB:
177
  logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
178
  mongo_db = MongoDB(settings=settings)
179
  mongo_db.connect()
180
-
181
- # Test the connection asynchronously - this will run after init returns
182
- asyncio.create_task(self._test_connection(mongo_db))
183
-
184
  return mongo_db
185
-
186
- async def _test_connection(self, mongo_db: MongoDB) -> None:
187
  """Test MongoDB connection and log the result."""
188
  try:
189
  is_connected = await mongo_db.ping()
@@ -193,11 +188,11 @@ class MongoDBResource(Resource):
193
  logger.error("MongoDB connection test failed!")
194
  except Exception as e:
195
  logger.error("Error testing MongoDB connection: {}", e)
196
-
 
197
  async def shutdown(self: Self, mongo_db: MongoDB) -> None:
198
  """Close MongoDB connection on shutdown."""
199
  try:
200
- logger.info("Closing MongoDB connection...")
201
  await mongo_db.close()
202
  except Exception as e:
203
  logger.error("Error closing MongoDB connection: {}", e)
 
1
+ from asyncio import create_task
2
+ from dependency_injector.resources import AsyncResource
3
  from motor.motor_asyncio import AsyncIOMotorClient
4
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
5
  from pymongo.operations import SearchIndexModel
6
  from loguru import logger
7
  from pydantic import BaseModel, PrivateAttr
8
  from typing import Any, Dict, Optional, Self
 
9
 
10
  from ctp_slack_bot.core.config import Settings
11
+ from ctp_slack_bot.utils import sanitize_mongo_db_uri
12
 
13
  class MongoDB(BaseModel):
14
  """
 
17
  settings: Settings
18
  _client: PrivateAttr = PrivateAttr()
19
  _db: PrivateAttr = PrivateAttr()
20
+
21
  class Config:
22
  arbitrary_types_allowed = True
23
+
24
  def __init__(self: Self, **data: Dict[str, Any]) -> None:
25
  super().__init__(**data)
26
  logger.debug("Created {}", self.__class__.__name__)
27
+
28
  def connect(self: Self) -> None:
29
  """Initialize MongoDB client with settings."""
30
  try:
31
  connection_string = self.settings.MONGODB_URI.get_secret_value()
32
+ logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
33
+
 
 
 
34
  # Create client with appropriate settings
35
  self._client = AsyncIOMotorClient(
36
  connection_string,
 
41
  retryWrites=True,
42
  w="majority"
43
  )
44
+
45
  # Set database
46
  db_name = self.settings.MONGODB_NAME
47
+
48
  self._db = self._client[db_name]
49
  logger.debug("MongoDB client initialized for database: {}", db_name)
50
+
51
  except Exception as e:
52
  logger.error("Failed to initialize MongoDB client: {}", e)
53
  self._client = None
54
  self._db = None
55
  raise
56
+
57
  @property
58
  def client(self: Self) -> AsyncIOMotorClient:
59
  """Get the MongoDB client instance."""
60
  if not hasattr(self, '_client') or self._client is None:
61
+ logger.warning("MongoDB client not initialized. Attempting to initialize")
62
  self.connect()
63
  if not hasattr(self, '_client') or self._client is None:
64
+ raise ConnectionError("Failed to initialize MongoDB client.")
65
  return self._client
66
+
67
  @property
68
  def db(self: Self) -> Any:
69
  """Get the MongoDB database instance."""
70
  if not hasattr(self, '_db') or self._db is None:
71
+ logger.warning("MongoDB database not initialized. Attempting to initialize client")
72
  self.connect()
73
  if not hasattr(self, '_db') or self._db is None:
74
+ raise ConnectionError("Failed to initialize MongoDB database.")
75
  return self._db
76
+
77
  async def ping(self: Self) -> bool:
78
  """Check if MongoDB connection is alive."""
79
  try:
80
  # Get client to ensure we're connected
81
  client = self.client
82
+
83
  # Try a simple ping command
84
  await client.admin.command('ping')
85
+ logger.debug("MongoDB connection is active!")
86
  return True
87
  except (ConnectionFailure, ServerSelectionTimeoutError) as e:
88
  logger.error("MongoDB connection failed: {}", e)
 
90
  except Exception as e:
91
  logger.error("Unexpected error during MongoDB ping: {}", e)
92
  return False
93
+
94
  async def get_collection(self: Self, name: str) -> Any:
95
  """
96
  Get a collection by name with validation.
 
98
  """
99
  # First ensure we can connect at all
100
  if not await self.ping():
101
+ logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name)
102
+ raise ConnectionError("MongoDB connection is not available.")
103
+
104
  try:
105
  # Get all collection names to check if this one exists
106
+ logger.debug("Checking if collection '{}' exists", name)
107
  collection_names = await self.db.list_collection_names()
108
 
109
  if name not in collection_names:
110
+ logger.info("Collection '{}' does not exist. Creating it", name)
111
  # Create the collection
112
  await self.db.create_collection(name)
113
+ logger.debug("Successfully created collection: {}", name)
114
  else:
115
+ logger.debug("Collection '{}' already exists!", name)
116
+
117
  # Get and return the collection
118
  collection = self.db[name]
119
  return collection
120
  except Exception as e:
121
  logger.error("Error accessing collection '{}': {}", name, e)
122
  raise
123
+
124
  async def create_indexes(self: Self, collection_name: str) -> None:
125
  """
126
  Create a vector search index on a collection.
 
129
  collection_name: Name of the collection
130
  """
131
  collection = await self.get_collection(collection_name)
132
+
133
  try:
134
  # Create search index model using MongoDB's recommended approach
135
  search_index_model = SearchIndexModel(
 
147
  name=f"{collection_name}_vector_index",
148
  type="vectorSearch"
149
  )
150
+
151
  # Create the search index using the motor collection
152
  result = await collection.create_search_index(search_index_model)
153
+ logger.info("Vector search index '{}' created for collection {}.", result, collection_name)
154
+
155
  except Exception as e:
156
  if "command not found" in str(e).lower():
157
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
158
  # Create a fallback standard index on embedding field
159
  await collection.create_index("embedding")
160
+ logger.info("Created standard index on 'embedding' field as fallback.")
161
  else:
162
  logger.error("Failed to create vector index: {}", e)
163
  raise
164
+
165
  async def close(self: Self) -> None:
166
  """Close MongoDB connection."""
167
  if self._client:
168
  self._client.close()
169
+ logger.info("Closed MongoDB connection.")
170
  self._client = None
171
  self._db = None
172
 
173
+ class MongoDBResource(AsyncResource):
174
+ async def init(self: Self, settings: Settings) -> MongoDB:
175
  logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
176
  mongo_db = MongoDB(settings=settings)
177
  mongo_db.connect()
178
+ await self._test_connection(mongo_db)
 
 
 
179
  return mongo_db
180
+
181
+ async def _test_connection(self: Self, mongo_db: MongoDB) -> None:
182
  """Test MongoDB connection and log the result."""
183
  try:
184
  is_connected = await mongo_db.ping()
 
188
  logger.error("MongoDB connection test failed!")
189
  except Exception as e:
190
  logger.error("Error testing MongoDB connection: {}", e)
191
+ raise
192
+
193
  async def shutdown(self: Self, mongo_db: MongoDB) -> None:
194
  """Close MongoDB connection on shutdown."""
195
  try:
 
196
  await mongo_db.close()
197
  except Exception as e:
198
  logger.error("Error closing MongoDB connection: {}", e)
src/ctp_slack_bot/models/__init__.py CHANGED
@@ -1,2 +1,4 @@
1
  from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk, VectorQuery
 
2
  from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair
 
 
1
  from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk, VectorQuery
2
+ from ctp_slack_bot.models.google_drive import GoogleDriveMetadata
3
  from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair
4
+ from ctp_slack_bot.models.webvtt import WebVTTContent, WebVTTFrame
src/ctp_slack_bot/models/base.py CHANGED
@@ -1,7 +1,6 @@
1
  from abc import ABC, abstractmethod
2
  from pydantic import BaseModel, ConfigDict, Field
3
- from types import MappingProxyType
4
- from typing import Any, Dict, final, Mapping, Self, Sequence, Optional
5
 
6
 
7
  class Chunk(BaseModel):
@@ -14,6 +13,7 @@ class Chunk(BaseModel):
14
 
15
  model_config = ConfigDict(frozen=True)
16
 
 
17
  @final
18
  class VectorQuery(BaseModel):
19
  """Model for vector database similarity search queries.
@@ -30,12 +30,14 @@ class VectorQuery(BaseModel):
30
  score_threshold: float = Field(default=0.7)
31
  filter_metadata: Optional[Mapping[str, Any]] = None
32
 
 
 
33
 
34
  @final
35
  class VectorizedChunk(Chunk):
36
  """A class representing a vectorized chunk of content."""
37
 
38
- embedding: Sequence[float] # The vector representation
39
 
40
 
41
  class Content(ABC, BaseModel):
@@ -44,22 +46,13 @@ class Content(ABC, BaseModel):
44
  model_config = ConfigDict(frozen=True)
45
 
46
  @abstractmethod
47
- def get_chunks(self: Self) -> Sequence[Chunk]:
48
  pass
49
 
50
  @abstractmethod
51
- def get_metadata(self: Self) -> Mapping[str, Any]:
52
- pass
53
-
54
- @abstractmethod
55
- def get_text(self: Self) -> str:
56
- pass
57
-
58
- @abstractmethod
59
- def get_bytes(self: Self) -> bytes:
60
  pass
61
 
62
- @property
63
  @abstractmethod
64
- def id(self: Self) -> str:
65
  pass
 
1
  from abc import ABC, abstractmethod
2
  from pydantic import BaseModel, ConfigDict, Field
3
+ from typing import Any, final, Mapping, Self, Sequence, Optional
 
4
 
5
 
6
  class Chunk(BaseModel):
 
13
 
14
  model_config = ConfigDict(frozen=True)
15
 
16
+
17
  @final
18
  class VectorQuery(BaseModel):
19
  """Model for vector database similarity search queries.
 
30
  score_threshold: float = Field(default=0.7)
31
  filter_metadata: Optional[Mapping[str, Any]] = None
32
 
33
+ model_config = ConfigDict(frozen=True)
34
+
35
 
36
  @final
37
  class VectorizedChunk(Chunk):
38
  """A class representing a vectorized chunk of content."""
39
 
40
+ embedding: Sequence[float] # The vector representation
41
 
42
 
43
  class Content(ABC, BaseModel):
 
46
  model_config = ConfigDict(frozen=True)
47
 
48
  @abstractmethod
49
+ def get_id(self: Self) -> str:
50
  pass
51
 
52
  @abstractmethod
53
+ def get_chunks(self: Self) -> Sequence[Chunk]:
 
 
 
 
 
 
 
 
54
  pass
55
 
 
56
  @abstractmethod
57
+ def get_metadata(self: Self) -> Mapping[str, Any]:
58
  pass
src/ctp_slack_bot/models/google_drive.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from pydantic import BaseModel, ConfigDict
3
+ from typing import Self
4
+
5
+ from ctp_slack_bot.models import FileContent
6
+
7
+
8
+ class GoogleDriveMetadata(BaseModel):
9
+ """Represents Google Drive file or folder metadata."""
10
+
11
+ id: str
12
+ name: str
13
+ modified_time: datetime
14
+ mime_type: str
15
+ folder_path: str
16
+
17
+ model_config = ConfigDict(frozen=True)
18
+
19
+ @classmethod
20
+ def from_folder_path_and_dict(cls: type["GoogleDriveMetadata"], folder_path: str, dict: dict) -> Self:
21
+ id = dict["id"]
22
+ name = dict["name"]
23
+ modified_time = datetime.fromisoformat(dict["modifiedTime"])
24
+ mime_type = dict["mimeType"]
25
+ return GoogleDriveMetadata(id=id, name=name, modified_time=modified_time, mime_type=mime_type, folder_path=folder_path)
src/ctp_slack_bot/models/slack.py CHANGED
@@ -63,31 +63,19 @@ class SlackMessage(Content):
63
  is_starred: Optional[bool] = None
64
  pinned_to: Optional[Sequence[str]] = None
65
  reactions: Optional[Sequence[SlackReaction]] = None
66
- _canonical_json: PrivateAttr
67
 
68
- def __init__(self: Self, **data: Dict[str, Any]) -> None:
69
- super().__init__(**data)
70
- self._canonical_json = PrivateAttr(default_factory=lambda: dumps(data, sort_keys=True).encode())
71
 
72
  def get_chunks(self: Self) -> Sequence[Chunk]:
73
- return (Chunk(text=self.text, parent_id=self.id, chunk_id="", metadata=self.get_metadata()), )
74
 
75
  def get_metadata(self: Self) -> Mapping[str, Any]:
76
  return MappingProxyType({
77
  "modificationTime": datetime.fromtimestamp(float(self.ts))
78
  })
79
 
80
- def get_text(self: Self) -> str:
81
- return self.text
82
-
83
- def get_bytes(self: Self) -> bytes:
84
- return self._canonical_json
85
-
86
- @property
87
- def id(self: Self) -> str:
88
- """Unique identifier for this message."""
89
- return f"slack-message:{self.channel}:{self.ts}"
90
-
91
  class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
92
  """Represents a response message to be sent to Slack."""
93
 
 
63
  is_starred: Optional[bool] = None
64
  pinned_to: Optional[Sequence[str]] = None
65
  reactions: Optional[Sequence[SlackReaction]] = None
 
66
 
67
+ def get_id(self: Self) -> str:
68
+ """Unique identifier for this message."""
69
+ return f"slack-message:{self.channel}:{self.ts}"
70
 
71
  def get_chunks(self: Self) -> Sequence[Chunk]:
72
+ return (Chunk(text=self.text, parent_id=self.get_id(), chunk_id="", metadata=self.get_metadata()), )
73
 
74
  def get_metadata(self: Self) -> Mapping[str, Any]:
75
  return MappingProxyType({
76
  "modificationTime": datetime.fromtimestamp(float(self.ts))
77
  })
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
80
  """Represents a response message to be sent to Slack."""
81
 
src/ctp_slack_bot/models/webvtt.py CHANGED
@@ -1,15 +1,18 @@
1
  from datetime import datetime, timedelta
2
  from io import BytesIO
 
3
  from json import dumps
4
- from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr
5
- import re
6
  from types import MappingProxyType
7
  from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence
8
  from webvtt import Caption, WebVTT
9
 
10
  from ctp_slack_bot.models.base import Chunk, Content
11
 
12
- SPEAKER_SPEECH_CAPTION_TEXT_PATTERN = re.compile('(?:([^:]+): )?(.*)')
 
 
13
 
14
  class WebVTTFrame(BaseModel):
15
  """Represents a WebVTT frame"""
@@ -23,54 +26,48 @@ class WebVTTFrame(BaseModel):
23
  model_config = ConfigDict(frozen=True)
24
 
25
  @classmethod
26
- def from_webvtt_caption(cls: type["WebVTTFrame"], caption: Caption) -> Self:
27
- identifier = caption.identifier
28
  start = timedelta(**caption.start_time.__dict__)
29
  end = timedelta(**caption.end_time.__dict__)
30
- speech = caption.text
31
- match SPEAKER_SPEECH_CAPTION_TEXT_PATTERN.search(speech).groups():
32
- case (speaker, speech):
33
  return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
34
- case _:
35
  return cls(identifier=identifier, start=start, end=end, speech=speech)
36
 
37
 
38
- class WebVTTFile(Content): # TODO: insert a FileContent class in the object inheritance hierarchy.
39
- """Represents a message from Slack after adaptation."""
40
 
41
- filename: str
42
- modification_time: datetime
43
- bytes: bytes
44
 
 
 
45
 
46
  def get_chunks(self: Self) -> Sequence[Chunk]:
47
- return tuple(Chunk(text=frame.speech,
48
- parent_id=self.id,
49
- chunk_id=frame.identifier,
 
 
 
 
 
50
  metadata={
51
- "filename": self.filename,
52
- "start": self.modification_time + frame.start,
53
- "end": self.modification_time + frame.end,
54
- "user": frame.speaker
55
  })
56
- for frame
57
- in self.get_frames())
58
 
59
  def get_metadata(self: Self) -> Mapping[str, Any]:
60
- return MappingProxyType({
61
- "filename": self.filename,
62
- "modificationTime": self.modification_time
63
- })
64
 
65
- def get_text(self: Self) -> str: # TODO
66
- raise NotImplemented()
67
-
68
- def get_bytes(self: Self) -> bytes:
69
- return self.bytes
70
-
71
- def get_frames(self: Self) -> Sequence[WebVTTFrame]:
72
- return tuple(map(WebVTTFrame.from_webvtt_caption, WebVTT.from_buffer(BytesIO(buffer)).captions))
73
-
74
- @property
75
- def id(self: Self) -> str:
76
- return f"file:{self.filename}"
 
1
  from datetime import datetime, timedelta
2
  from io import BytesIO
3
+ from itertools import starmap
4
  from json import dumps
5
+ from more_itertools import windowed
6
+ from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr
7
  from types import MappingProxyType
8
  from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence
9
  from webvtt import Caption, WebVTT
10
 
11
  from ctp_slack_bot.models.base import Chunk, Content
12
 
13
+ CHUNK_FRAMES_OVERLAP = 1
14
+ CHUNK_FRAMES_WINDOW = 5
15
+ SPEAKER_SPEECH_TEXT_SEPARATOR = ": "
16
 
17
  class WebVTTFrame(BaseModel):
18
  """Represents a WebVTT frame"""
 
26
  model_config = ConfigDict(frozen=True)
27
 
28
  @classmethod
29
+ def from_webvtt_caption(cls: type["WebVTTFrame"], index: int, caption: Caption) -> Self:
30
+ identifier = caption.identifier if caption.identifier else str(index)
31
  start = timedelta(**caption.start_time.__dict__)
32
  end = timedelta(**caption.end_time.__dict__)
33
+ match caption.text.split(SPEAKER_SPEECH_TEXT_SEPARATOR, 1):
34
+ case [speaker, speech]:
 
35
  return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
36
+ case [speech]:
37
  return cls(identifier=identifier, start=start, end=end, speech=speech)
38
 
39
 
40
+ class WebVTTContent(Content):
41
+ """Represents parsed WebVTT content."""
42
 
43
+ id: str
44
+ metadata: Mapping[str, Any] = Field(default_factory=dict)
45
+ frames: Sequence[WebVTTFrame]
46
 
47
+ def get_id(self: Self) -> str:
48
+ return self.id
49
 
50
  def get_chunks(self: Self) -> Sequence[Chunk]:
51
+ windows = (tuple(filter(None, window))
52
+ for window
53
+ in windowed(self.frames, CHUNK_FRAMES_WINDOW, step=CHUNK_FRAMES_WINDOW-CHUNK_FRAMES_OVERLAP))
54
+ return tuple(Chunk(text="\n\n".join(": ".join(filter(None, (frame.speaker, frame.speech)))
55
+ for frame
56
+ in frames),
57
+ parent_id=self.get_id(),
58
+ chunk_id=f"{frames[0].identifier}-{frames[-1].identifier}",
59
  metadata={
60
+ "start": str(frames[0].start), # TODO: This is a harder problem: to get the offsets to become real datetimes so that they can be queryable using MongoDB.
61
+ "end": str(frames[-1].end),
62
+ "speakers": [frame.speaker for frame in frames if frame.speaker]
 
63
  })
64
+ for frames
65
+ in windows)
66
 
67
  def get_metadata(self: Self) -> Mapping[str, Any]:
68
+ return MappingProxyType(self.metadata)
 
 
 
69
 
70
+ @classmethod
71
+ def from_bytes(cls: type["WebVTTContent"], id: str, metadata: Mapping[str, Any], buffer: bytes) -> Self:
72
+ frames = tuple(starmap(WebVTTFrame.from_webvtt_caption, enumerate(WebVTT.from_buffer(BytesIO(buffer)).captions, 1)))
73
+ return WebVTTContent(id=id, metadata=MappingProxyType(metadata), frames=frames)
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/__init__.py CHANGED
@@ -3,6 +3,7 @@ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionSer
3
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
5
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
6
  from ctp_slack_bot.services.language_model_service import LanguageModelService
7
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
8
  from ctp_slack_bot.services.slack_service import SlackService
 
3
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
5
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
6
+ from ctp_slack_bot.services.google_drive_service import GoogleDriveService
7
  from ctp_slack_bot.services.language_model_service import LanguageModelService
8
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
9
  from ctp_slack_bot.services.slack_service import SlackService
src/ctp_slack_bot/services/application_database_service.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from loguru import logger
3
+ from pydantic import BaseModel, PrivateAttr
4
+ from typing import Iterable, Mapping, Self
5
+
6
+ from ctp_slack_bot.core import Settings
7
+ from ctp_slack_bot.db import MongoDB
8
+
9
+
10
+ class ApplicationDatabaseService(BaseModel):
11
+ """Service for retrieving and persisting application state."""
12
+
13
+ settings: Settings
14
+ mongo_db: MongoDB # TODO: This should be replaced following the repository pattern―one repository class per collection.
15
+
16
+ class Config:
17
+ frozen=True
18
+
19
+ def __init__(self: Self, **data) -> None:
20
+ super().__init__(**data)
21
+ logger.debug("Created {}", self.__class__.__name__)
22
+
23
+ async def get_last_modification_times_by_file_paths(self: Self, file_paths: Iterable[str]) -> Mapping[str, datetime]:
24
+ """Retrieve the last modification time for each file path."""
25
+ raise NotImplementedError() # TODO
26
+
27
+ async def set_last_modification_time_by_file_path(self: Self, file_path: str, modification_time: datetime) -> None:
28
+ """Set the last modification time for a file path."""
29
+ raise NotImplementedError() # TODO
src/ctp_slack_bot/services/content_ingestion_service.py CHANGED
@@ -30,8 +30,8 @@ class ContentIngestionService(BaseModel):
30
 
31
  async def process_incoming_content(self: Self, content: Content) -> None:
32
  logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
33
- # if self.vector_database_service.has_content(content.id) # TODO
34
- # logger.debug("Ignored content with ID {} because it already exists in the database.", content.id)
35
  # return
36
  chunks = content.get_chunks()
37
  await self.__vectorize_and_store_chunks_in_database(chunks)
 
30
 
31
  async def process_incoming_content(self: Self, content: Content) -> None:
32
  logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
33
+ # if self.vector_database_service.has_content(content.get_id()) # TODO
34
+ # logger.debug("Ignored content with ID {} because it already exists in the database.", content.get_id())
35
  # return
36
  chunks = content.get_chunks()
37
  await self.__vectorize_and_store_chunks_in_database(chunks)
src/ctp_slack_bot/services/google_drive_service.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from cachetools import TTLCache
3
+ from google.oauth2 import service_account
4
+ from googleapiclient.discovery import build
5
+ from googleapiclient.http import MediaIoBaseDownload
6
+ from googleapiclient.errors import HttpError
7
+ from io import BytesIO
8
+ from loguru import logger
9
+ from pydantic import BaseModel, PrivateAttr
10
+ from typing import Collection, Dict, List, Optional, Self
11
+
12
+ from ctp_slack_bot.core import Settings
13
+ from ctp_slack_bot.models import GoogleDriveMetadata
14
+
15
+
16
+ FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
17
+ PATH_SEPARATOR: str = "/"
18
+
19
+
20
+ class GoogleDriveService(BaseModel):
21
+ """Service for interacting with Google Drive."""
22
+
23
+ settings: Settings
24
+ _google_drive_client: PrivateAttr = PrivateAttr()
25
+ _folder_cache: PrivateAttr = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
26
+
27
+ class Config:
28
+ frozen=True
29
+
30
+ def __init__(self: Self, **data) -> None:
31
+ super().__init__(**data)
32
+ credentials = service_account.Credentials.from_service_account_info({
33
+ "type": "service_account",
34
+ "project_id": self.settings.GOOGLE_PROJECT_ID,
35
+ "private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID.get_secret_value(),
36
+ "private_key": self.settings.GOOGLE_PRIVATE_KEY.get_secret_value(),
37
+ "client_email": self.settings.GOOGLE_CLIENT_EMAIL,
38
+ "client_id": self.settings.GOOGLE_CLIENT_ID,
39
+ "token_uri": self.settings.GOOGLE_TOKEN_URI,
40
+ }, scopes=["https://www.googleapis.com/auth/drive"])
41
+ self._google_drive_client = build('drive', 'v3', credentials=credentials)
42
+ logger.debug("Created {}", self.__class__.__name__)
43
+
44
+ def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
45
+ """Resolve a folder path to a Google Drive ID."""
46
+
47
+ if not folder_path:
48
+ return self.settings.GOOGLE_DRIVE_ROOT_ID
49
+
50
+ if folder_path in self._folder_cache:
51
+ return self._folder_cache[folder_path]
52
+
53
+ current_id = self.settings.GOOGLE_DRIVE_ROOT_ID
54
+ try:
55
+ for part in folder_path.split(PATH_SEPARATOR):
56
+ results = self._google_drive_client.files().list(
57
+ q=f"name='{part.replace("\\", "\\\\").replace("'", "\\'")}' and mimeType='{FOLDER_MIME_TYPE}' and '{current_id}' in parents",
58
+ fields="files(id,name)",
59
+ supportsAllDrives=True,
60
+ includeItemsFromAllDrives=True
61
+ ).execute()
62
+ match results:
63
+ case {"files": [ {"id": id} ]}:
64
+ current_id = id
65
+ case _:
66
+ logger.debug("Folder not found by path: {}", folder_path)
67
+ return None
68
+ except HttpError as e:
69
+ logger.error("Error resolving folder path: {}", folder_path)
70
+ return None
71
+
72
+ self._folder_cache[folder_path] = current_id
73
+ return current_id
74
+
75
+ def list_directory(self: Self, folder_path: str) -> Collection[GoogleDriveMetadata]:
76
+ """List contents of a directory with basic metadata."""
77
+
78
+ folder_id = self._resolve_folder_id(folder_path)
79
+ if not folder_id:
80
+ logger.debug("Folder not found by path: {}", folder_path)
81
+ return ()
82
+
83
+ try:
84
+ results = self._google_drive_client.files().list(
85
+ q=f"'{folder_id}' in parents",
86
+ fields="files(id,name,mimeType,modifiedTime)",
87
+ supportsAllDrives=True,
88
+ includeItemsFromAllDrives=True,
89
+ pageSize=1000
90
+ ).execute()
91
+ return tuple(GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result)
92
+ for result
93
+ in results.get('files', ()))
94
+ except HttpError as e:
95
+ logger.error("Error listing folder by path, {}: {}", folder_path, e)
96
+ return ()
97
+
98
+ def get_metadata(self: Self, item_path: str) -> Optional[GoogleDriveMetadata]:
99
+ """Get metadata for a specific file/folder by path."""
100
+
101
+ match item_path.rsplit(PATH_SEPARATOR, 1):
102
+ case [item_name]:
103
+ folder_path = ""
104
+ folder_id = self.settings.GOOGLE_DRIVE_ROOT_ID
105
+ case [folder_path, item_name]:
106
+ folder_id = self._resolve_folder_id(folder_path)
107
+
108
+ if not folder_id:
109
+ logger.debug("Folder not found by path: {}", folder_path)
110
+ return None
111
+
112
+ try:
113
+ results = self._google_drive_client.files().list(
114
+ q=f"name='{item_name}' and '{folder_id}' in parents",
115
+ fields="files(id,name,mimeType,modifiedTime)",
116
+ supportsAllDrives=True,
117
+ includeItemsFromAllDrives=True,
118
+ pageSize=1
119
+ ).execute()
120
+ match results:
121
+ case {"files": [result]}:
122
+ return GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result)
123
+ except HttpError as e:
124
+ logger.error("Error getting metadata for item by path, {}: {}", item_path, e)
125
+
126
+ logger.debug("Item not found by path: {}", item_path)
127
+ return None
128
+
129
+ def read_file_by_id(self: Self, file_id: str) -> Optional[bytes]:
130
+ """Read contents of a file by its unique identifier."""
131
+
132
+ try:
133
+ request = self._google_drive_client.files().get_media(fileId=file_id)
134
+ buffer = BytesIO()
135
+ downloader = MediaIoBaseDownload(buffer, request)
136
+ done = False
137
+ while not done:
138
+ _, done = downloader.next_chunk()
139
+ return buffer.getvalue()
140
+ except HttpError as e:
141
+ logger.error("Error reading file by ID, {}: {}", file_id, e)
142
+ return None
src/ctp_slack_bot/services/schedule_service.py CHANGED
@@ -2,6 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
2
  from apscheduler.triggers.cron import CronTrigger
3
  from asyncio import create_task, iscoroutinefunction, to_thread
4
  from datetime import datetime
 
5
  from loguru import logger
6
  from pydantic import BaseModel, PrivateAttr
7
  from pytz import timezone
@@ -47,11 +48,21 @@ class ScheduleService(BaseModel):
47
 
48
  def start(self: Self) -> None:
49
  self._scheduler.start()
50
- logger.info("Started scheduler.")
51
 
52
  def stop(self: Self) -> None:
53
  if self._scheduler.running:
54
- self._scheduler.shutdown(wait=False)
55
- logger.info("Shut down scheduler.")
56
  else:
57
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from apscheduler.triggers.cron import CronTrigger
3
  from asyncio import create_task, iscoroutinefunction, to_thread
4
  from datetime import datetime
5
+ from dependency_injector.resources import Resource
6
  from loguru import logger
7
  from pydantic import BaseModel, PrivateAttr
8
  from pytz import timezone
 
48
 
49
  def start(self: Self) -> None:
50
  self._scheduler.start()
 
51
 
52
  def stop(self: Self) -> None:
53
  if self._scheduler.running:
54
+ self._scheduler.shutdown()
 
55
  else:
56
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
57
+
58
+ class ScheduleServiceResource(Resource):
59
+ def init(self: Self, settings: Settings) -> ScheduleService:
60
+ logger.info("Starting scheduler…")
61
+ schedule_service = ScheduleService(settings=settings)
62
+ schedule_service.start()
63
+ return schedule_service
64
+
65
+ def shutdown(self: Self, schedule_service: ScheduleService) -> None:
66
+ """Stop scheduler on shutdown."""
67
+ schedule_service.stop()
68
+ logger.info("Stopped scheduler.")
src/ctp_slack_bot/services/vector_database_service.py CHANGED
@@ -172,4 +172,4 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
172
  bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME)
173
  logger.debug("Query details: k={}, dimension={}",
174
  query.k, len(query.query_embeddings) if query.query_embeddings else "None")
175
- raise
 
172
  bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME)
173
  logger.debug("Query details: k={}, dimension={}",
174
  query.k, len(query.query_embeddings) if query.query_embeddings else "None")
175
+ raise
src/ctp_slack_bot/utils/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri
src/ctp_slack_bot/utils/secret_stripper.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from urllib.parse import urlparse, urlunparse
2
+
3
+ def sanitize_mongo_db_uri(uri: str) -> str:
4
+ parts = urlparse(uri)
5
+ sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
6
+ return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))