LiKenun commited on
Commit
d9e81c2
·
1 Parent(s): cc6f2db

MIME type handler proof-of-concept

Browse files
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb CHANGED
@@ -11,31 +11,7 @@
11
  "cell_type": "code",
12
  "execution_count": null,
13
  "metadata": {},
14
- "outputs": [
15
- {
16
- "name": "stderr",
17
- "output_type": "stream",
18
- "text": [
19
- "\u001b[32m2025-04-20 00:08:14.649\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n",
20
- "\u001b[32m2025-04-20 00:08:14.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.google_drive_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m44\u001b[0m - \u001b[34m\u001b[1mCreated GoogleDriveService\u001b[0m\n",
21
- "\u001b[32m2025-04-20 00:08:14.664\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated EmbeddingsModelService\u001b[0m\n",
22
- "\u001b[32m2025-04-20 00:08:14.664\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vectorization_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated VectorizationService\u001b[0m\n"
23
- ]
24
- },
25
- {
26
- "name": "stderr",
27
- "output_type": "stream",
28
- "text": [
29
- "\u001b[32m2025-04-20 00:08:14.666\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot_dev\u001b[0m\n",
30
- "\u001b[32m2025-04-20 00:08:14.666\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n",
31
- "\u001b[32m2025-04-20 00:08:14.667\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n",
32
- "\u001b[32m2025-04-20 00:08:14.667\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot_dev\u001b[0m\n",
33
- "\u001b[32m2025-04-20 00:08:15.043\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
34
- "\u001b[32m2025-04-20 00:08:15.044\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n",
35
- "\u001b[32m2025-04-20 00:08:15.044\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n"
36
- ]
37
- }
38
- ],
39
  "source": [
40
  "from datetime import datetime\n",
41
  "from functools import partial\n",
@@ -51,9 +27,10 @@
51
  "display_html = partial(display_html, raw=True)\n",
52
  "\n",
53
  "container = Container()\n",
 
54
  "google_drive_service = container.google_drive_service()\n",
55
  "vectorization_service = container.vectorization_service()\n",
56
- "vector_database_service = container.vector_database_service()"
57
  ]
58
  },
59
  {
@@ -67,7 +44,7 @@
67
  },
68
  {
69
  "cell_type": "code",
70
- "execution_count": 6,
71
  "metadata": {},
72
  "outputs": [],
73
  "source": [
@@ -85,64 +62,9 @@
85
  },
86
  {
87
  "cell_type": "code",
88
- "execution_count": 7,
89
  "metadata": {},
90
- "outputs": [
91
- {
92
- "data": {
93
- "text/html": [
94
- "<p>Found 11 files/folders.</p>"
95
- ]
96
- },
97
- "metadata": {},
98
- "output_type": "display_data"
99
- },
100
- {
101
- "data": {
102
- "text/html": [
103
- "<ul><li>/Friday Building AI Applications Session</li><li>/Friday Building AI Applications Session/GMT20250411-223535_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250404-231749_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250328-223256_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250321-223330_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250314-223145_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250307-233135_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250228-233632_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250221-233332_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250214-234809_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250207-233258_Recording.transcript.vtt</li></ul>"
104
- ]
105
- },
106
- "metadata": {},
107
- "output_type": "display_data"
108
- },
109
- {
110
- "data": {
111
- "text/html": [
112
- "<p>11 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off.</p>"
113
- ]
114
- },
115
- "metadata": {},
116
- "output_type": "display_data"
117
- },
118
- {
119
- "data": {
120
- "text/html": [
121
- "<ul><li>/Friday Building AI Applications Session</li><li>/Friday Building AI Applications Session/GMT20250411-223535_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250404-231749_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250328-223256_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250321-223330_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250314-223145_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250307-233135_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250228-233632_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250221-233332_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250214-234809_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250207-233258_Recording.transcript.vtt</li></ul>"
122
- ]
123
- },
124
- "metadata": {},
125
- "output_type": "display_data"
126
- },
127
- {
128
- "data": {
129
- "text/html": [
130
- "<p>11 files/folders pass the modification time (<em>2024-08-30 00:00:00+00:00</em>) cut-off and MIME type (<em>text/vtt</em>) criterion.</p>"
131
- ]
132
- },
133
- "metadata": {},
134
- "output_type": "display_data"
135
- },
136
- {
137
- "data": {
138
- "text/html": [
139
- "<ul><li>/Friday Building AI Applications Session/GMT20250411-223535_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250404-231749_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250328-223256_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250321-223330_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250314-223145_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250307-233135_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250228-233632_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250221-233332_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250214-234809_Recording.transcript.vtt</li><li>/Friday Building AI Applications Session/GMT20250207-233258_Recording.transcript.vtt</li></ul>"
140
- ]
141
- },
142
- "metadata": {},
143
- "output_type": "display_data"
144
- }
145
- ],
146
  "source": [
147
  "item_metadata = google_drive_service.list_directory(\"\", True)\n",
148
  "display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
@@ -159,27 +81,27 @@
159
  },
160
  {
161
  "cell_type": "code",
162
- "execution_count": 8,
 
 
 
 
 
 
 
 
 
 
163
  "metadata": {},
164
- "outputs": [
165
- {
166
- "data": {
167
- "text/html": [
168
- "Processed 10 files."
169
- ]
170
- },
171
- "metadata": {},
172
- "output_type": "display_data"
173
- }
174
- ],
175
  "source": [
176
- "web_vtts = tuple(WebVTTContent.from_bytes(f\"googledrive:{metadata.folder_path}/{metadata.name}\",\n",
177
- " {\n",
178
- " \"filename\": metadata.name,\n",
179
- " \"mimeType\": metadata.mime_type,\n",
180
- " \"modificationTime\": metadata.modified_time\n",
181
- " },\n",
182
- " google_drive_service.read_file_by_id(metadata.id))\n",
183
  " for metadata\n",
184
  " in metadata_to_process)\n",
185
  "\n",
@@ -188,528 +110,16 @@
188
  },
189
  {
190
  "cell_type": "code",
191
- "execution_count": 9,
192
  "metadata": {},
193
- "outputs": [
194
- {
195
- "data": {
196
- "text/html": [
197
- "Chunked GMT20250411-223535_Recording.transcript.vtt into 86 chunks."
198
- ]
199
- },
200
- "metadata": {},
201
- "output_type": "display_data"
202
- },
203
- {
204
- "name": "stderr",
205
- "output_type": "stream",
206
- "text": [
207
- "\u001b[32m2025-04-20 00:08:52.269\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 86 text string(s)…\u001b[0m\n"
208
- ]
209
- },
210
- {
211
- "data": {
212
- "text/html": [
213
- "Vectorized GMT20250411-223535_Recording.transcript.vtt’s 86 chunks."
214
- ]
215
- },
216
- "metadata": {},
217
- "output_type": "display_data"
218
- },
219
- {
220
- "name": "stderr",
221
- "output_type": "stream",
222
- "text": [
223
- "\u001b[32m2025-04-20 00:08:54.190\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 86 chunks\u001b[0m\n",
224
- "\u001b[32m2025-04-20 00:08:54.216\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
225
- "\u001b[32m2025-04-20 00:08:54.217\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
226
- "\u001b[32m2025-04-20 00:08:54.240\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mCollection 'vectors' does not exist. Creating it…\u001b[0m\n",
227
- "\u001b[32m2025-04-20 00:08:54.301\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m113\u001b[0m - \u001b[34m\u001b[1mSuccessfully created collection: vectors\u001b[0m\n",
228
- "\u001b[32m2025-04-20 00:08:54.302\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
229
- "\u001b[32m2025-04-20 00:08:54.320\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
230
- "\u001b[32m2025-04-20 00:08:54.321\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
231
- "\u001b[32m2025-04-20 00:08:54.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
232
- "\u001b[32m2025-04-20 00:08:54.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
233
- "\u001b[32m2025-04-20 00:08:54.494\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 86 documents into vectors collection\u001b[0m\n",
234
- "\u001b[32m2025-04-20 00:08:55.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 86 vector chunks in database\u001b[0m\n"
235
- ]
236
- },
237
- {
238
- "data": {
239
- "text/html": [
240
- "Stored GMT20250411-223535_Recording.transcript.vtt’s 86 vectorized chunks to the database."
241
- ]
242
- },
243
- "metadata": {},
244
- "output_type": "display_data"
245
- },
246
- {
247
- "data": {
248
- "text/html": [
249
- "Chunked GMT20250404-231749_Recording.transcript.vtt into 56 chunks."
250
- ]
251
- },
252
- "metadata": {},
253
- "output_type": "display_data"
254
- },
255
- {
256
- "name": "stderr",
257
- "output_type": "stream",
258
- "text": [
259
- "\u001b[32m2025-04-20 00:08:55.241\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 56 text string(s)…\u001b[0m\n"
260
- ]
261
- },
262
- {
263
- "data": {
264
- "text/html": [
265
- "Vectorized GMT20250404-231749_Recording.transcript.vtt’s 56 chunks."
266
- ]
267
- },
268
- "metadata": {},
269
- "output_type": "display_data"
270
- },
271
- {
272
- "name": "stderr",
273
- "output_type": "stream",
274
- "text": [
275
- "\u001b[32m2025-04-20 00:08:56.099\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 56 chunks\u001b[0m\n",
276
- "\u001b[32m2025-04-20 00:08:56.119\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
277
- "\u001b[32m2025-04-20 00:08:56.120\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
278
- "\u001b[32m2025-04-20 00:08:56.151\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
279
- "\u001b[32m2025-04-20 00:08:56.151\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
280
- "\u001b[32m2025-04-20 00:08:56.170\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
281
- "\u001b[32m2025-04-20 00:08:56.170\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
282
- "\u001b[32m2025-04-20 00:08:56.199\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
283
- "\u001b[32m2025-04-20 00:08:56.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
284
- "\u001b[32m2025-04-20 00:08:56.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 56 documents into vectors collection\u001b[0m\n",
285
- "\u001b[32m2025-04-20 00:08:56.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 56 vector chunks in database\u001b[0m\n"
286
- ]
287
- },
288
- {
289
- "data": {
290
- "text/html": [
291
- "Stored GMT20250404-231749_Recording.transcript.vtt’s 56 vectorized chunks to the database."
292
- ]
293
- },
294
- "metadata": {},
295
- "output_type": "display_data"
296
- },
297
- {
298
- "data": {
299
- "text/html": [
300
- "Chunked GMT20250328-223256_Recording.transcript.vtt into 359 chunks."
301
- ]
302
- },
303
- "metadata": {},
304
- "output_type": "display_data"
305
- },
306
- {
307
- "name": "stderr",
308
- "output_type": "stream",
309
- "text": [
310
- "\u001b[32m2025-04-20 00:08:56.735\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 359 text string(s)…\u001b[0m\n"
311
- ]
312
- },
313
- {
314
- "data": {
315
- "text/html": [
316
- "Vectorized GMT20250328-223256_Recording.transcript.vtt’s 359 chunks."
317
- ]
318
- },
319
- "metadata": {},
320
- "output_type": "display_data"
321
- },
322
- {
323
- "name": "stderr",
324
- "output_type": "stream",
325
- "text": [
326
- "\u001b[32m2025-04-20 00:09:00.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 359 chunks\u001b[0m\n",
327
- "\u001b[32m2025-04-20 00:09:00.384\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
328
- "\u001b[32m2025-04-20 00:09:00.384\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
329
- "\u001b[32m2025-04-20 00:09:00.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
330
- "\u001b[32m2025-04-20 00:09:00.404\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
331
- "\u001b[32m2025-04-20 00:09:00.424\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
332
- "\u001b[32m2025-04-20 00:09:00.424\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
333
- "\u001b[32m2025-04-20 00:09:00.445\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
334
- "\u001b[32m2025-04-20 00:09:00.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
335
- "\u001b[32m2025-04-20 00:09:00.590\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 359 documents into vectors collection\u001b[0m\n",
336
- "\u001b[32m2025-04-20 00:09:04.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 359 vector chunks in database\u001b[0m\n"
337
- ]
338
- },
339
- {
340
- "data": {
341
- "text/html": [
342
- "Stored GMT20250328-223256_Recording.transcript.vtt’s 359 vectorized chunks to the database."
343
- ]
344
- },
345
- "metadata": {},
346
- "output_type": "display_data"
347
- },
348
- {
349
- "data": {
350
- "text/html": [
351
- "Chunked GMT20250321-223330_Recording.transcript.vtt into 314 chunks."
352
- ]
353
- },
354
- "metadata": {},
355
- "output_type": "display_data"
356
- },
357
- {
358
- "name": "stderr",
359
- "output_type": "stream",
360
- "text": [
361
- "\u001b[32m2025-04-20 00:09:04.397\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 314 text string(s)…\u001b[0m\n"
362
- ]
363
- },
364
- {
365
- "data": {
366
- "text/html": [
367
- "Vectorized GMT20250321-223330_Recording.transcript.vtt’s 314 chunks."
368
- ]
369
- },
370
- "metadata": {},
371
- "output_type": "display_data"
372
- },
373
- {
374
- "name": "stderr",
375
- "output_type": "stream",
376
- "text": [
377
- "\u001b[32m2025-04-20 00:09:07.348\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 314 chunks\u001b[0m\n",
378
- "\u001b[32m2025-04-20 00:09:07.369\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
379
- "\u001b[32m2025-04-20 00:09:07.370\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
380
- "\u001b[32m2025-04-20 00:09:07.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
381
- "\u001b[32m2025-04-20 00:09:07.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
382
- "\u001b[32m2025-04-20 00:09:07.410\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
383
- "\u001b[32m2025-04-20 00:09:07.410\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
384
- "\u001b[32m2025-04-20 00:09:07.430\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
385
- "\u001b[32m2025-04-20 00:09:07.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
386
- "\u001b[32m2025-04-20 00:09:07.568\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 314 documents into vectors collection\u001b[0m\n",
387
- "\u001b[32m2025-04-20 00:09:11.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 314 vector chunks in database\u001b[0m\n"
388
- ]
389
- },
390
- {
391
- "data": {
392
- "text/html": [
393
- "Stored GMT20250321-223330_Recording.transcript.vtt’s 314 vectorized chunks to the database."
394
- ]
395
- },
396
- "metadata": {},
397
- "output_type": "display_data"
398
- },
399
- {
400
- "data": {
401
- "text/html": [
402
- "Chunked GMT20250314-223145_Recording.transcript.vtt into 331 chunks."
403
- ]
404
- },
405
- "metadata": {},
406
- "output_type": "display_data"
407
- },
408
- {
409
- "name": "stderr",
410
- "output_type": "stream",
411
- "text": [
412
- "\u001b[32m2025-04-20 00:09:11.157\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 331 text string(s)…\u001b[0m\n"
413
- ]
414
- },
415
- {
416
- "data": {
417
- "text/html": [
418
- "Vectorized GMT20250314-223145_Recording.transcript.vtt’s 331 chunks."
419
- ]
420
- },
421
- "metadata": {},
422
- "output_type": "display_data"
423
- },
424
- {
425
- "name": "stderr",
426
- "output_type": "stream",
427
- "text": [
428
- "\u001b[32m2025-04-20 00:09:14.751\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 331 chunks\u001b[0m\n",
429
- "\u001b[32m2025-04-20 00:09:14.774\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
430
- "\u001b[32m2025-04-20 00:09:14.774\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
431
- "\u001b[32m2025-04-20 00:09:14.794\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
432
- "\u001b[32m2025-04-20 00:09:14.794\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
433
- "\u001b[32m2025-04-20 00:09:14.813\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
434
- "\u001b[32m2025-04-20 00:09:14.813\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
435
- "\u001b[32m2025-04-20 00:09:14.834\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
436
- "\u001b[32m2025-04-20 00:09:14.948\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
437
- "\u001b[32m2025-04-20 00:09:14.950\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 331 documents into vectors collection\u001b[0m\n",
438
- "\u001b[32m2025-04-20 00:09:18.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 331 vector chunks in database\u001b[0m\n"
439
- ]
440
- },
441
- {
442
- "data": {
443
- "text/html": [
444
- "Stored GMT20250314-223145_Recording.transcript.vtt’s 331 vectorized chunks to the database."
445
- ]
446
- },
447
- "metadata": {},
448
- "output_type": "display_data"
449
- },
450
- {
451
- "data": {
452
- "text/html": [
453
- "Chunked GMT20250307-233135_Recording.transcript.vtt into 280 chunks."
454
- ]
455
- },
456
- "metadata": {},
457
- "output_type": "display_data"
458
- },
459
- {
460
- "name": "stderr",
461
- "output_type": "stream",
462
- "text": [
463
- "\u001b[32m2025-04-20 00:09:18.643\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 280 text string(s)…\u001b[0m\n"
464
- ]
465
- },
466
- {
467
- "data": {
468
- "text/html": [
469
- "Vectorized GMT20250307-233135_Recording.transcript.vtt’s 280 chunks."
470
- ]
471
- },
472
- "metadata": {},
473
- "output_type": "display_data"
474
- },
475
- {
476
- "name": "stderr",
477
- "output_type": "stream",
478
- "text": [
479
- "\u001b[32m2025-04-20 00:09:22.256\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 280 chunks\u001b[0m\n",
480
- "\u001b[32m2025-04-20 00:09:22.278\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
481
- "\u001b[32m2025-04-20 00:09:22.279\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
482
- "\u001b[32m2025-04-20 00:09:22.297\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
483
- "\u001b[32m2025-04-20 00:09:22.297\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
484
- "\u001b[32m2025-04-20 00:09:22.344\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
485
- "\u001b[32m2025-04-20 00:09:22.345\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
486
- "\u001b[32m2025-04-20 00:09:22.368\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
487
- "\u001b[32m2025-04-20 00:09:22.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
488
- "\u001b[32m2025-04-20 00:09:22.507\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 280 documents into vectors collection\u001b[0m\n",
489
- "\u001b[32m2025-04-20 00:09:24.988\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 280 vector chunks in database\u001b[0m\n"
490
- ]
491
- },
492
- {
493
- "data": {
494
- "text/html": [
495
- "Stored GMT20250307-233135_Recording.transcript.vtt’s 280 vectorized chunks to the database."
496
- ]
497
- },
498
- "metadata": {},
499
- "output_type": "display_data"
500
- },
501
- {
502
- "data": {
503
- "text/html": [
504
- "Chunked GMT20250228-233632_Recording.transcript.vtt into 233 chunks."
505
- ]
506
- },
507
- "metadata": {},
508
- "output_type": "display_data"
509
- },
510
- {
511
- "name": "stderr",
512
- "output_type": "stream",
513
- "text": [
514
- "\u001b[32m2025-04-20 00:09:24.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 233 text string(s)…\u001b[0m\n"
515
- ]
516
- },
517
- {
518
- "data": {
519
- "text/html": [
520
- "Vectorized GMT20250228-233632_Recording.transcript.vtt’s 233 chunks."
521
- ]
522
- },
523
- "metadata": {},
524
- "output_type": "display_data"
525
- },
526
- {
527
- "name": "stderr",
528
- "output_type": "stream",
529
- "text": [
530
- "\u001b[32m2025-04-20 00:09:28.628\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 233 chunks\u001b[0m\n",
531
- "\u001b[32m2025-04-20 00:09:28.648\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
532
- "\u001b[32m2025-04-20 00:09:28.649\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
533
- "\u001b[32m2025-04-20 00:09:28.669\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
534
- "\u001b[32m2025-04-20 00:09:28.669\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
535
- "\u001b[32m2025-04-20 00:09:28.688\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
536
- "\u001b[32m2025-04-20 00:09:28.688\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
537
- "\u001b[32m2025-04-20 00:09:28.709\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
538
- "\u001b[32m2025-04-20 00:09:28.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
539
- "\u001b[32m2025-04-20 00:09:28.838\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 233 documents into vectors collection\u001b[0m\n",
540
- "\u001b[32m2025-04-20 00:09:31.039\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 233 vector chunks in database\u001b[0m\n"
541
- ]
542
- },
543
- {
544
- "data": {
545
- "text/html": [
546
- "Stored GMT20250228-233632_Recording.transcript.vtt’s 233 vectorized chunks to the database."
547
- ]
548
- },
549
- "metadata": {},
550
- "output_type": "display_data"
551
- },
552
- {
553
- "data": {
554
- "text/html": [
555
- "Chunked GMT20250221-233332_Recording.transcript.vtt into 278 chunks."
556
- ]
557
- },
558
- "metadata": {},
559
- "output_type": "display_data"
560
- },
561
- {
562
- "name": "stderr",
563
- "output_type": "stream",
564
- "text": [
565
- "\u001b[32m2025-04-20 00:09:31.042\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 278 text string(s)…\u001b[0m\n"
566
- ]
567
- },
568
- {
569
- "data": {
570
- "text/html": [
571
- "Vectorized GMT20250221-233332_Recording.transcript.vtt’s 278 chunks."
572
- ]
573
- },
574
- "metadata": {},
575
- "output_type": "display_data"
576
- },
577
- {
578
- "name": "stderr",
579
- "output_type": "stream",
580
- "text": [
581
- "\u001b[32m2025-04-20 00:09:36.119\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 278 chunks\u001b[0m\n",
582
- "\u001b[32m2025-04-20 00:09:36.138\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
583
- "\u001b[32m2025-04-20 00:09:36.138\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
584
- "\u001b[32m2025-04-20 00:09:36.157\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
585
- "\u001b[32m2025-04-20 00:09:36.158\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
586
- "\u001b[32m2025-04-20 00:09:36.177\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
587
- "\u001b[32m2025-04-20 00:09:36.177\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
588
- "\u001b[32m2025-04-20 00:09:36.198\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
589
- "\u001b[32m2025-04-20 00:09:36.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
590
- "\u001b[32m2025-04-20 00:09:36.316\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 278 documents into vectors collection\u001b[0m\n",
591
- "\u001b[32m2025-04-20 00:09:38.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 278 vector chunks in database\u001b[0m\n"
592
- ]
593
- },
594
- {
595
- "data": {
596
- "text/html": [
597
- "Stored GMT20250221-233332_Recording.transcript.vtt’s 278 vectorized chunks to the database."
598
- ]
599
- },
600
- "metadata": {},
601
- "output_type": "display_data"
602
- },
603
- {
604
- "data": {
605
- "text/html": [
606
- "Chunked GMT20250214-234809_Recording.transcript.vtt into 97 chunks."
607
- ]
608
- },
609
- "metadata": {},
610
- "output_type": "display_data"
611
- },
612
- {
613
- "name": "stderr",
614
- "output_type": "stream",
615
- "text": [
616
- "\u001b[32m2025-04-20 00:09:38.710\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 97 text string(s)…\u001b[0m\n"
617
- ]
618
- },
619
- {
620
- "data": {
621
- "text/html": [
622
- "Vectorized GMT20250214-234809_Recording.transcript.vtt’s 97 chunks."
623
- ]
624
- },
625
- "metadata": {},
626
- "output_type": "display_data"
627
- },
628
- {
629
- "name": "stderr",
630
- "output_type": "stream",
631
- "text": [
632
- "\u001b[32m2025-04-20 00:09:40.479\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 97 chunks\u001b[0m\n",
633
- "\u001b[32m2025-04-20 00:09:40.499\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
634
- "\u001b[32m2025-04-20 00:09:40.499\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
635
- "\u001b[32m2025-04-20 00:09:40.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
636
- "\u001b[32m2025-04-20 00:09:40.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
637
- "\u001b[32m2025-04-20 00:09:40.548\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
638
- "\u001b[32m2025-04-20 00:09:40.548\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
639
- "\u001b[32m2025-04-20 00:09:40.568\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
640
- "\u001b[32m2025-04-20 00:09:40.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
641
- "\u001b[32m2025-04-20 00:09:40.679\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 97 documents into vectors collection\u001b[0m\n",
642
- "\u001b[32m2025-04-20 00:09:41.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 97 vector chunks in database\u001b[0m\n"
643
- ]
644
- },
645
- {
646
- "data": {
647
- "text/html": [
648
- "Stored GMT20250214-234809_Recording.transcript.vtt’s 97 vectorized chunks to the database."
649
- ]
650
- },
651
- "metadata": {},
652
- "output_type": "display_data"
653
- },
654
- {
655
- "data": {
656
- "text/html": [
657
- "Chunked GMT20250207-233258_Recording.transcript.vtt into 209 chunks."
658
- ]
659
- },
660
- "metadata": {},
661
- "output_type": "display_data"
662
- },
663
- {
664
- "name": "stderr",
665
- "output_type": "stream",
666
- "text": [
667
- "\u001b[32m2025-04-20 00:09:41.565\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 209 text string(s)…\u001b[0m\n"
668
- ]
669
- },
670
- {
671
- "data": {
672
- "text/html": [
673
- "Vectorized GMT20250207-233258_Recording.transcript.vtt’s 209 chunks."
674
- ]
675
- },
676
- "metadata": {},
677
- "output_type": "display_data"
678
- },
679
- {
680
- "name": "stderr",
681
- "output_type": "stream",
682
- "text": [
683
- "\u001b[32m2025-04-20 00:09:44.152\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 209 chunks\u001b[0m\n",
684
- "\u001b[32m2025-04-20 00:09:44.178\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
685
- "\u001b[32m2025-04-20 00:09:44.178\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
686
- "\u001b[32m2025-04-20 00:09:44.197\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
687
- "\u001b[32m2025-04-20 00:09:44.198\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n",
688
- "\u001b[32m2025-04-20 00:09:44.221\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n",
689
- "\u001b[32m2025-04-20 00:09:44.222\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n",
690
- "\u001b[32m2025-04-20 00:09:44.247\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n",
691
- "\u001b[32m2025-04-20 00:09:44.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n",
692
- "\u001b[32m2025-04-20 00:09:44.391\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 209 documents into vectors collection\u001b[0m\n",
693
- "\u001b[32m2025-04-20 00:09:46.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 209 vector chunks in database\u001b[0m\n"
694
- ]
695
- },
696
- {
697
- "data": {
698
- "text/html": [
699
- "Stored GMT20250207-233258_Recording.transcript.vtt’s 209 vectorized chunks to the database."
700
- ]
701
- },
702
- "metadata": {},
703
- "output_type": "display_data"
704
- }
705
- ],
706
  "source": [
707
  "for web_vtt in web_vtts:\n",
708
  " chunks = web_vtt.get_chunks()\n",
709
  " display_html(f\"Chunked {web_vtt.get_metadata().get(\"filename\")} into {len(chunks)} chunks.\")\n",
710
  " vectorized_chunks = vectorization_service.vectorize(chunks)\n",
711
  " display_html(f\"Vectorized {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} chunks.\")\n",
712
- " await (await vector_database_service).store(vectorized_chunks)\n",
713
  " display_html(f\"Stored {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} vectorized chunks to the database.\")"
714
  ]
715
  }
 
11
  "cell_type": "code",
12
  "execution_count": null,
13
  "metadata": {},
14
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "source": [
16
  "from datetime import datetime\n",
17
  "from functools import partial\n",
 
27
  "display_html = partial(display_html, raw=True)\n",
28
  "\n",
29
  "container = Container()\n",
30
+ "mongo_db = await container.mongo_db()\n",
31
  "google_drive_service = container.google_drive_service()\n",
32
  "vectorization_service = container.vectorization_service()\n",
33
+ "vector_database_service = await container.vector_database_service()"
34
  ]
35
  },
36
  {
 
44
  },
45
  {
46
  "cell_type": "code",
47
+ "execution_count": null,
48
  "metadata": {},
49
  "outputs": [],
50
  "source": [
 
62
  },
63
  {
64
  "cell_type": "code",
65
+ "execution_count": null,
66
  "metadata": {},
67
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  "source": [
69
  "item_metadata = google_drive_service.list_directory(\"\", True)\n",
70
  "display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
 
81
  },
82
  {
83
  "cell_type": "code",
84
+ "execution_count": null,
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
89
+ "display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p>\")"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
  "metadata": {},
96
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
97
  "source": [
98
+ "web_vtts = tuple(web_vtt_parser.from_bytes(f\"googledrive:{metadata.folder_path}/{metadata.name}\",\n",
99
+ " {\n",
100
+ " \"filename\": metadata.name,\n",
101
+ " \"mimeType\": metadata.mime_type,\n",
102
+ " \"modificationTime\": metadata.modified_time\n",
103
+ " },\n",
104
+ " google_drive_service.read_file_by_id(metadata.id))\n",
105
  " for metadata\n",
106
  " in metadata_to_process)\n",
107
  "\n",
 
110
  },
111
  {
112
  "cell_type": "code",
113
+ "execution_count": null,
114
  "metadata": {},
115
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  "source": [
117
  "for web_vtt in web_vtts:\n",
118
  " chunks = web_vtt.get_chunks()\n",
119
  " display_html(f\"Chunked {web_vtt.get_metadata().get(\"filename\")} into {len(chunks)} chunks.\")\n",
120
  " vectorized_chunks = vectorization_service.vectorize(chunks)\n",
121
  " display_html(f\"Vectorized {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} chunks.\")\n",
122
+ " await vector_database_service.store(vectorized_chunks)\n",
123
  " display_html(f\"Stored {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} vectorized chunks to the database.\")"
124
  ]
125
  }
src/ctp_slack_bot/mime_type_handlers/text/vtt.py CHANGED
@@ -1,6 +1,7 @@
1
  from datetime import datetime
2
  from io import BytesIO
3
  from re import compile as compile_re
 
4
  from typing import Any, Mapping, Optional, Self
5
  from webvtt import WebVTT
6
 
@@ -28,7 +29,7 @@ class WebVTTMimeTypeHandler(BaseMimeTypeHandler):
28
  for result
29
  in map(ISO_DATE_TIME_PATTERN.findall, web_vtt.header_comments)
30
  if result)
31
- except ValueError:
32
  return None
33
 
34
  def from_bytes(self: Self, id: str, metadata: Mapping[str, Any], buffer: bytes) -> WebVTTContent:
@@ -36,4 +37,4 @@ class WebVTTMimeTypeHandler(BaseMimeTypeHandler):
36
  frames = tuple(WebVTTFrame.from_webvtt_caption(caption, index)
37
  for index, caption
38
  in enumerate(web_vtt.captions, 1))
39
- return WebVTTContent(id=id, metadata=metadata, start_time=WebVTTMimeTypeHandler.__get_start_time(web_vtt), frames=frames)
 
1
  from datetime import datetime
2
  from io import BytesIO
3
  from re import compile as compile_re
4
+ from types import MappingProxyType
5
  from typing import Any, Mapping, Optional, Self
6
  from webvtt import WebVTT
7
 
 
29
  for result
30
  in map(ISO_DATE_TIME_PATTERN.findall, web_vtt.header_comments)
31
  if result)
32
+ except (StopIteration, ValueError):
33
  return None
34
 
35
  def from_bytes(self: Self, id: str, metadata: Mapping[str, Any], buffer: bytes) -> WebVTTContent:
 
37
  frames = tuple(WebVTTFrame.from_webvtt_caption(caption, index)
38
  for index, caption
39
  in enumerate(web_vtt.captions, 1))
40
+ return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=WebVTTMimeTypeHandler.__get_start_time(web_vtt), frames=frames)
src/ctp_slack_bot/models/webvtt.py CHANGED
@@ -69,21 +69,3 @@ class WebVTTContent(Content):
69
 
70
  def get_metadata(self: Self) -> Mapping[str, Any]:
71
  return MappingProxyType(self.metadata)
72
-
73
- @classmethod
74
- def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
75
- try:
76
- return next(datetime.fromisoformat(result[0])
77
- for result
78
- in map(ISO_DATE_TIME_PATTERN.findall, web_vtt.header_comments)
79
- if result)
80
- except (StopIteration, ValueError):
81
- return None
82
-
83
- @classmethod
84
- def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Self:
85
- web_vtt = WebVTT.from_buffer(BytesIO(buffer))
86
- frames = tuple(WebVTTFrame.from_webvtt_caption(caption, index)
87
- for index, caption
88
- in enumerate(web_vtt.captions, 1))
89
- return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=cls.__get_start_time(web_vtt), frames=frames)
 
69
 
70
  def get_metadata(self: Self) -> Mapping[str, Any]:
71
  return MappingProxyType(self.metadata)