Spaces:
Runtime error
Runtime error
Code clean-up: removal of dead code and shortening expressions
Browse files
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb
CHANGED
@@ -86,7 +86,16 @@
|
|
86 |
"outputs": [],
|
87 |
"source": [
|
88 |
"web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
|
89 |
-
"display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p>\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
]
|
91 |
},
|
92 |
{
|
|
|
86 |
"outputs": [],
|
87 |
"source": [
|
88 |
"web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
|
89 |
+
"display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "code",
|
94 |
+
"execution_count": null,
|
95 |
+
"metadata": {},
|
96 |
+
"outputs": [],
|
97 |
+
"source": [
|
98 |
+
"display_html(f\"<code>{id(container.mime_type_handler_factory(MIME_TYPE))}</code>\")"
|
99 |
]
|
100 |
},
|
101 |
{
|
src/ctp_slack_bot/containers.py
CHANGED
@@ -8,7 +8,7 @@ from slack_bolt.async_app import AsyncApp
|
|
8 |
from ctp_slack_bot.core.config import Settings
|
9 |
from ctp_slack_bot.db.mongo_db import MongoDBResource
|
10 |
from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
|
11 |
-
from ctp_slack_bot.mime_type_handlers.base import
|
12 |
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
13 |
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
14 |
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
@@ -78,7 +78,7 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
|
|
78 |
slack_service,
|
79 |
slack_bolt_app,
|
80 |
settings)
|
81 |
-
mime_type_handler_factory = Callable (
|
82 |
google_drive_service = Singleton(GoogleDriveService,
|
83 |
settings=settings)
|
84 |
# file_monitor_service = Singleton(FileMonitorService,
|
|
|
8 |
from ctp_slack_bot.core.config import Settings
|
9 |
from ctp_slack_bot.db.mongo_db import MongoDBResource
|
10 |
from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
|
11 |
+
from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler
|
12 |
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
13 |
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
14 |
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
|
|
78 |
slack_service,
|
79 |
slack_bolt_app,
|
80 |
settings)
|
81 |
+
mime_type_handler_factory = Callable (MimeTypeHandler.for_mime_type)
|
82 |
google_drive_service = Singleton(GoogleDriveService,
|
83 |
settings=settings)
|
84 |
# file_monitor_service = Singleton(FileMonitorService,
|
src/ctp_slack_bot/mime_type_handlers/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
from ctp_slack_bot.mime_type_handlers.base import
|
2 |
from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
|
|
|
1 |
+
from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler, MimeTypeHandlerMeta
|
2 |
from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
|
src/ctp_slack_bot/mime_type_handlers/base.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from abc import ABCMeta, abstractmethod
|
|
|
2 |
from typing import Any, ClassVar, Dict, Mapping, Optional
|
3 |
|
4 |
from ctp_slack_bot.models import Content
|
@@ -18,11 +19,12 @@ class MimeTypeHandlerABCMeta(MimeTypeHandlerMeta, ABCMeta):
|
|
18 |
pass
|
19 |
|
20 |
|
21 |
-
class
|
22 |
|
23 |
@classmethod
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
@abstractmethod
|
28 |
def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
|
|
|
1 |
from abc import ABCMeta, abstractmethod
|
2 |
+
from functools import lru_cache
|
3 |
from typing import Any, ClassVar, Dict, Mapping, Optional
|
4 |
|
5 |
from ctp_slack_bot.models import Content
|
|
|
19 |
pass
|
20 |
|
21 |
|
22 |
+
class MimeTypeHandler(metaclass=MimeTypeHandlerABCMeta):
|
23 |
|
24 |
@classmethod
|
25 |
+
@lru_cache
|
26 |
+
def for_mime_type(cls, mime_type: str) -> Optional[type["MimeTypeHandler"]]:
|
27 |
+
return cls._registry.get(mime_type)()
|
28 |
|
29 |
@abstractmethod
|
30 |
def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
|
src/ctp_slack_bot/mime_type_handlers/text/vtt.py
CHANGED
@@ -5,23 +5,16 @@ from types import MappingProxyType
|
|
5 |
from typing import Any, Mapping, Optional, Self
|
6 |
from webvtt import WebVTT
|
7 |
|
8 |
-
from ctp_slack_bot.mime_type_handlers import
|
9 |
from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
|
10 |
|
11 |
|
12 |
ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
|
13 |
|
14 |
-
class WebVTTMimeTypeHandler(
|
15 |
|
16 |
MIME_TYPE = "text/vtt"
|
17 |
|
18 |
-
def from_buffer(self: Self, id: str, metadata: Mapping[str, Any], buffer: bytes) -> WebVTTContent:
|
19 |
-
web_vtt = WebVTT.from_buffer(BytesIO(buffer))
|
20 |
-
frames = tuple(WebVTTFrame.from_webvtt_caption(caption, index)
|
21 |
-
for index, caption
|
22 |
-
in enumerate(web_vtt.captions, 1))
|
23 |
-
return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=cls.__get_start_time(web_vtt), frames=frames)
|
24 |
-
|
25 |
@classmethod
|
26 |
def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
|
27 |
try:
|
|
|
5 |
from typing import Any, Mapping, Optional, Self
|
6 |
from webvtt import WebVTT
|
7 |
|
8 |
+
from ctp_slack_bot.mime_type_handlers import MimeTypeHandler
|
9 |
from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
|
10 |
|
11 |
|
12 |
ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
|
13 |
|
14 |
+
class WebVTTMimeTypeHandler(MimeTypeHandler):
|
15 |
|
16 |
MIME_TYPE = "text/vtt"
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
@classmethod
|
19 |
def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
|
20 |
try:
|