LiKenun commited on
Commit
07d8dfc
·
1 Parent(s): cf871ae

Code clean-up: removal of dead code and shortening expressions

Browse files
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb CHANGED
@@ -86,7 +86,16 @@
86
  "outputs": [],
87
  "source": [
88
  "web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
89
- "display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p>\")"
 
 
 
 
 
 
 
 
 
90
  ]
91
  },
92
  {
 
86
  "outputs": [],
87
  "source": [
88
  "web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
89
+ "display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "display_html(f\"<code>{id(container.mime_type_handler_factory(MIME_TYPE))}</code>\")"
99
  ]
100
  },
101
  {
src/ctp_slack_bot/containers.py CHANGED
@@ -8,7 +8,7 @@ from slack_bolt.async_app import AsyncApp
8
  from ctp_slack_bot.core.config import Settings
9
  from ctp_slack_bot.db.mongo_db import MongoDBResource
10
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
11
- from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandlerMeta
12
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
13
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
14
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
@@ -78,7 +78,7 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
78
  slack_service,
79
  slack_bolt_app,
80
  settings)
81
- mime_type_handler_factory = Callable (lambda mime_type: MimeTypeHandlerMeta._registry[mime_type]())
82
  google_drive_service = Singleton(GoogleDriveService,
83
  settings=settings)
84
  # file_monitor_service = Singleton(FileMonitorService,
 
8
  from ctp_slack_bot.core.config import Settings
9
  from ctp_slack_bot.db.mongo_db import MongoDBResource
10
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
11
+ from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler
12
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
13
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
14
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 
78
  slack_service,
79
  slack_bolt_app,
80
  settings)
81
+ mime_type_handler_factory = Callable (MimeTypeHandler.for_mime_type)
82
  google_drive_service = Singleton(GoogleDriveService,
83
  settings=settings)
84
  # file_monitor_service = Singleton(FileMonitorService,
src/ctp_slack_bot/mime_type_handlers/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from ctp_slack_bot.mime_type_handlers.base import BaseMimeTypeHandler, MimeTypeHandlerMeta
2
  from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
 
1
+ from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler, MimeTypeHandlerMeta
2
  from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
src/ctp_slack_bot/mime_type_handlers/base.py CHANGED
@@ -1,4 +1,5 @@
1
  from abc import ABCMeta, abstractmethod
 
2
  from typing import Any, ClassVar, Dict, Mapping, Optional
3
 
4
  from ctp_slack_bot.models import Content
@@ -18,11 +19,12 @@ class MimeTypeHandlerABCMeta(MimeTypeHandlerMeta, ABCMeta):
18
  pass
19
 
20
 
21
- class BaseMimeTypeHandler(metaclass=MimeTypeHandlerABCMeta):
22
 
23
  @classmethod
24
- def for_mime_type(cls, mime_type: str) -> Optional[type["BaseMimeHandler"]]:
25
- return cls._registry.get(mime_type)
 
26
 
27
  @abstractmethod
28
  def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
 
1
  from abc import ABCMeta, abstractmethod
2
+ from functools import lru_cache
3
  from typing import Any, ClassVar, Dict, Mapping, Optional
4
 
5
  from ctp_slack_bot.models import Content
 
19
  pass
20
 
21
 
22
+ class MimeTypeHandler(metaclass=MimeTypeHandlerABCMeta):
23
 
24
  @classmethod
25
+ @lru_cache
26
+ def for_mime_type(cls, mime_type: str) -> Optional[type["MimeTypeHandler"]]:
27
+ return cls._registry.get(mime_type)()
28
 
29
  @abstractmethod
30
  def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
src/ctp_slack_bot/mime_type_handlers/text/vtt.py CHANGED
@@ -5,23 +5,16 @@ from types import MappingProxyType
5
  from typing import Any, Mapping, Optional, Self
6
  from webvtt import WebVTT
7
 
8
- from ctp_slack_bot.mime_type_handlers import BaseMimeTypeHandler
9
  from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
10
 
11
 
12
  ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
13
 
14
- class WebVTTMimeTypeHandler(BaseMimeTypeHandler):
15
 
16
  MIME_TYPE = "text/vtt"
17
 
18
- def from_buffer(self: Self, id: str, metadata: Mapping[str, Any], buffer: bytes) -> WebVTTContent:
19
- web_vtt = WebVTT.from_buffer(BytesIO(buffer))
20
- frames = tuple(WebVTTFrame.from_webvtt_caption(caption, index)
21
- for index, caption
22
- in enumerate(web_vtt.captions, 1))
23
- return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=cls.__get_start_time(web_vtt), frames=frames)
24
-
25
  @classmethod
26
  def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
27
  try:
 
5
  from typing import Any, Mapping, Optional, Self
6
  from webvtt import WebVTT
7
 
8
+ from ctp_slack_bot.mime_type_handlers import MimeTypeHandler
9
  from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
10
 
11
 
12
  ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
13
 
14
+ class WebVTTMimeTypeHandler(MimeTypeHandler):
15
 
16
  MIME_TYPE = "text/vtt"
17
 
 
 
 
 
 
 
 
18
  @classmethod
19
  def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
20
  try: