LiKenun commited on
Commit
d1ed688
·
1 Parent(s): bb7c9a3

Use decorator-based plugin registration

Browse files
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb CHANGED
@@ -85,7 +85,7 @@
85
  "metadata": {},
86
  "outputs": [],
87
  "source": [
88
- "web_vtt_parser = container.mime_type_handler_factory(MIME_TYPE)\n",
89
  "display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
90
  ]
91
  },
@@ -95,7 +95,7 @@
95
  "metadata": {},
96
  "outputs": [],
97
  "source": [
98
- "display_html(f\"<code>{id(container.mime_type_handler_factory(MIME_TYPE))}</code>\")"
99
  ]
100
  },
101
  {
 
85
  "metadata": {},
86
  "outputs": [],
87
  "source": [
88
+ "web_vtt_parser = container.mime_type_handlers()[MIME_TYPE]\n",
89
  "display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
90
  ]
91
  },
 
95
  "metadata": {},
96
  "outputs": [],
97
  "source": [
98
+ "display_html(f\"<code>{id(container.mime_type_handlers()[MIME_TYPE])}</code>\")"
99
  ]
100
  },
101
  {
src/ctp_slack_bot/app.py CHANGED
@@ -18,7 +18,7 @@ async def main() -> None:
18
  container = Container()
19
  container.wire(packages=["ctp_slack_bot"])
20
 
21
- # Kick off services which should be active from the start.
22
  application_health_service = await container.application_health_service()
23
  container.content_ingestion_service()
24
  container.question_dispatch_service()
 
18
  container = Container()
19
  container.wire(packages=["ctp_slack_bot"])
20
 
21
+ # Instantiate services which should be active from the beginning.
22
  application_health_service = await container.application_health_service()
23
  container.content_ingestion_service()
24
  container.question_dispatch_service()
src/ctp_slack_bot/containers.py CHANGED
@@ -1,5 +1,5 @@
1
  from dependency_injector.containers import DeclarativeContainer
2
- from dependency_injector.providers import Callable, List, Resource, Singleton
3
  from importlib import import_module
4
  from pkgutil import iter_modules
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
@@ -9,7 +9,7 @@ from types import ModuleType
9
  from ctp_slack_bot.core import Settings
10
  from ctp_slack_bot.db.mongo_db import MongoDBResource
11
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
12
- from ctp_slack_bot.mime_type_handlers import MimeTypeHandler
13
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
14
  from ctp_slack_bot.services.application_health_service import ApplicationHealthService
15
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
@@ -86,7 +86,9 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
86
  slack_service,
87
  slack_bolt_app,
88
  settings)
89
- mime_type_handler_factory = Callable (MimeTypeHandler.for_mime_type)
 
 
90
  google_drive_service = Singleton(GoogleDriveService,
91
  settings=settings)
92
  # file_monitor_service = Singleton(FileMonitorService,
 
1
  from dependency_injector.containers import DeclarativeContainer
2
+ from dependency_injector.providers import Callable, Dict, List, Resource, Singleton
3
  from importlib import import_module
4
  from pkgutil import iter_modules
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
 
9
  from ctp_slack_bot.core import Settings
10
  from ctp_slack_bot.db.mongo_db import MongoDBResource
11
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
12
+ from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandlerRegistry
13
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
14
  from ctp_slack_bot.services.application_health_service import ApplicationHealthService
15
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
 
86
  slack_service,
87
  slack_bolt_app,
88
  settings)
89
+ mime_type_handlers = Dict ({mime_type: Singleton(handler)
90
+ for mime_type, handler
91
+ in MimeTypeHandlerRegistry.get_registry().items()})
92
  google_drive_service = Singleton(GoogleDriveService,
93
  settings=settings)
94
  # file_monitor_service = Singleton(FileMonitorService,
src/ctp_slack_bot/mime_type_handlers/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
- from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler, MimeTypeHandlerMeta
2
  from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
 
1
+ from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler, MimeTypeHandlerRegistry
2
  from ctp_slack_bot.mime_type_handlers.text.vtt import WebVTTMimeTypeHandler
src/ctp_slack_bot/mime_type_handlers/base.py CHANGED
@@ -1,31 +1,39 @@
1
  from abc import ABC, abstractmethod
2
  from functools import lru_cache
 
 
3
  from typing import Any, ClassVar, Mapping, Optional
4
 
 
5
  from ctp_slack_bot.models import Content
6
 
7
 
8
- class MimeTypeHandlerMeta(type):
9
 
10
- _registry: ClassVar[dict[str, type["BaseMimeTypeHandler"]]] = {}
 
 
11
 
12
- def __init__(cls, name: str, bases: tuple[type, ...], dict: dict[str, Any]) -> None:
13
- super().__init__(name, bases, dict)
14
- if hasattr(cls, "MIME_TYPE"):
15
- MimeTypeHandlerMeta._registry[cls.MIME_TYPE] = cls
16
 
 
17
 
18
- class MimeTypeHandlerABCMeta(MimeTypeHandlerMeta, type(ABC)):
19
- pass
20
 
 
 
 
 
21
 
22
- class MimeTypeHandler(metaclass=MimeTypeHandlerABCMeta):
 
 
 
 
 
 
 
23
 
24
  @classmethod
25
  @lru_cache
26
- def for_mime_type(cls, mime_type: str) -> Optional[type["MimeTypeHandler"]]:
27
  return cls._registry.get(mime_type)()
28
-
29
- @abstractmethod
30
- def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
31
- pass
 
1
  from abc import ABC, abstractmethod
2
  from functools import lru_cache
3
+ from importlib import import_module
4
+ from types import MappingProxyType
5
  from typing import Any, ClassVar, Mapping, Optional
6
 
7
+ from ctp_slack_bot.core import ApplicationComponentBase
8
  from ctp_slack_bot.models import Content
9
 
10
 
11
+ class MimeTypeHandler(ApplicationComponentBase):
12
 
13
+ @abstractmethod
14
+ def from_bytes(cls, id: str, metadata: Mapping[str, Any], buffer: bytes) -> Content:
15
+ pass
16
 
 
 
 
 
17
 
18
+ class MimeTypeHandlerRegistry:
19
 
20
+ _registry: ClassVar[dict[str, MimeTypeHandler]] = {}
 
21
 
22
+ @classmethod
23
+ def get_registry(cls) -> Mapping[str, MimeTypeHandler]:
24
+ import_module(__package__)
25
+ return MappingProxyType(cls._registry)
26
 
27
+ @classmethod
28
+ def register(cls, mime_type: str):
29
+ def decorator(handler_cls: MimeTypeHandler):
30
+ if mime_type in cls._registry:
31
+ raise ValueError(f"The MIME type, {mime_type}, is already registered.")
32
+ cls._registry[mime_type] = handler_cls
33
+ return handler_cls
34
+ return decorator
35
 
36
  @classmethod
37
  @lru_cache
38
+ def get_handler(cls, mime_type: str) -> Optional[MimeTypeHandler]:
39
  return cls._registry.get(mime_type)()
 
 
 
 
src/ctp_slack_bot/mime_type_handlers/text/vtt.py CHANGED
@@ -1,27 +1,28 @@
1
  from datetime import datetime
2
  from io import BytesIO
3
- from re import compile as compile_re
 
4
  from types import MappingProxyType
5
  from typing import Any, Mapping, Optional, Self
6
  from webvtt import WebVTT
7
 
8
- from ctp_slack_bot.mime_type_handlers import MimeTypeHandler
9
  from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
10
 
11
 
12
- ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
13
-
14
-
15
  class WebVTTMimeTypeHandler(MimeTypeHandler):
16
 
17
- MIME_TYPE = "text/vtt"
 
 
18
 
19
  @classmethod
20
  def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
21
  try:
22
  return next(datetime.fromisoformat(result[0])
23
  for result
24
- in map(ISO_DATE_TIME_PATTERN.findall, web_vtt.header_comments)
25
  if result)
26
  except (StopIteration, ValueError):
27
  return None
@@ -32,3 +33,7 @@ class WebVTTMimeTypeHandler(MimeTypeHandler):
32
  for index, caption
33
  in enumerate(web_vtt.captions, 1))
34
  return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=WebVTTMimeTypeHandler.__get_start_time(web_vtt), frames=frames)
 
 
 
 
 
1
  from datetime import datetime
2
  from io import BytesIO
3
+ from pydantic import ConfigDict
4
+ from re import compile as compile_re, Pattern
5
  from types import MappingProxyType
6
  from typing import Any, Mapping, Optional, Self
7
  from webvtt import WebVTT
8
 
9
+ from ctp_slack_bot.mime_type_handlers import MimeTypeHandler, MimeTypeHandlerRegistry
10
  from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
11
 
12
 
13
+ @MimeTypeHandlerRegistry.register("text/vtt")
 
 
14
  class WebVTTMimeTypeHandler(MimeTypeHandler):
15
 
16
+ model_config = ConfigDict(frozen=True)
17
+
18
+ ISO_DATE_TIME_PATTERN: Pattern = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
19
 
20
  @classmethod
21
  def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
22
  try:
23
  return next(datetime.fromisoformat(result[0])
24
  for result
25
+ in map(cls.ISO_DATE_TIME_PATTERN.findall, web_vtt.header_comments)
26
  if result)
27
  except (StopIteration, ValueError):
28
  return None
 
33
  for index, caption
34
  in enumerate(web_vtt.captions, 1))
35
  return WebVTTContent(id=id, metadata=MappingProxyType(metadata), start_time=WebVTTMimeTypeHandler.__get_start_time(web_vtt), frames=frames)
36
+
37
+ @property
38
+ def name(self: Self) -> str:
39
+ return "web_vtt_mime_type_handler"