Spaces:
Runtime error
Runtime error
Refactor #5
Browse files- README.md +5 -1
- notebooks/google_drive.ipynb +0 -0
- notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb +29 -11
- notebooks/web_vtt.ipynb +0 -0
- src/ctp_slack_bot/app.py +30 -40
- src/ctp_slack_bot/containers.py +20 -16
- src/ctp_slack_bot/controllers/__init__.py +2 -0
- src/ctp_slack_bot/controllers/application_health_controller.py +24 -0
- src/ctp_slack_bot/controllers/base.py +55 -0
- src/ctp_slack_bot/core/__init__.py +2 -2
- src/ctp_slack_bot/core/logging.py +3 -2
- src/ctp_slack_bot/db/__init__.py +1 -1
- src/ctp_slack_bot/db/mongo_db.py +3 -3
- src/ctp_slack_bot/db/repositories/__init__.py +3 -3
- src/ctp_slack_bot/db/repositories/mongo_db_vector_repository_base.py +39 -0
- src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py +25 -5
- src/ctp_slack_bot/db/repositories/vector_repository_base.py +0 -59
- src/ctp_slack_bot/mime_type_handlers/__init__.py +2 -2
- src/ctp_slack_bot/mime_type_handlers/text/__init__.py +1 -1
- src/ctp_slack_bot/mime_type_handlers/text/vtt.py +1 -1
- src/ctp_slack_bot/models/__init__.py +4 -4
- src/ctp_slack_bot/models/slack.py +1 -1
- src/ctp_slack_bot/models/webvtt.py +1 -1
- src/ctp_slack_bot/services/__init__.py +13 -11
- src/ctp_slack_bot/services/answer_retrieval_service.py +2 -2
- src/ctp_slack_bot/services/content_ingestion_service.py +2 -2
- src/ctp_slack_bot/services/context_retrieval_service.py +1 -1
- src/ctp_slack_bot/services/http_server_service.py +42 -6
- src/ctp_slack_bot/services/question_dispatch_service.py +3 -3
- src/ctp_slack_bot/services/slack_service.py +1 -1
- src/ctp_slack_bot/services/vectorization_service.py +1 -1
- src/ctp_slack_bot/utils/__init__.py +2 -2
README.md
CHANGED
@@ -84,9 +84,13 @@ Not every file or folder is listed, but the important stuff is here.
|
|
84 |
* `src/`
|
85 |
* `ctp_slack_bot/`
|
86 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
|
|
87 |
* `db/`: data connection and interface logic
|
88 |
* `repositories/`: data collection/table interface logic
|
|
|
|
|
89 |
* `models/`: data models
|
|
|
90 |
* `services/`: business logic
|
91 |
* `answer_retrieval_service.py`: obtains an answer to a question from a language model using relevant context
|
92 |
* `application_health_service.py`: collects the health status of the application components
|
@@ -94,11 +98,11 @@ Not every file or folder is listed, but the important stuff is here.
|
|
94 |
* `context_retrieval_service.py`: queries for relevant context from the database to answer a question
|
95 |
* `embeddings_model_service.py`: converts text to embeddings
|
96 |
* `event_brokerage_service.py`: brokers events between decoupled components
|
|
|
97 |
* `language_model_service.py`: answers questions using relevant context
|
98 |
* `question_dispatch_service.py`: listens for questions and retrieves relevant context to get answers
|
99 |
* `schedule_service.py`: runs background jobs
|
100 |
* `slack_service.py`: handles events from Slack and sends back responses
|
101 |
-
* `vector_database_service.py`: stores and queries chunks
|
102 |
* `vectorization_service.py`: converts chunks into chunks with embeddings
|
103 |
* `tasks/`: scheduled tasks to run in the background
|
104 |
* `utils/`: reusable utilities
|
|
|
84 |
* `src/`
|
85 |
* `ctp_slack_bot/`
|
86 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
87 |
+
* `config.py`: application settings model
|
88 |
* `db/`: data connection and interface logic
|
89 |
* `repositories/`: data collection/table interface logic
|
90 |
+
* `mongo_db_vectorized_repository_base.py`: base implementation of a repository corresponding to a MongoDB collection with a search index
|
91 |
+
* `vectorized_chunk_repository.py`: repository interface for `VectorizedChunk`s
|
92 |
* `models/`: data models
|
93 |
+
* `mime_type_handlers`: parsers for converting bytes of different MIME types to `Chunk`s
|
94 |
* `services/`: business logic
|
95 |
* `answer_retrieval_service.py`: obtains an answer to a question from a language model using relevant context
|
96 |
* `application_health_service.py`: collects the health status of the application components
|
|
|
98 |
* `context_retrieval_service.py`: queries for relevant context from the database to answer a question
|
99 |
* `embeddings_model_service.py`: converts text to embeddings
|
100 |
* `event_brokerage_service.py`: brokers events between decoupled components
|
101 |
+
* `google_drive_service.py`: interfaces with Google Drive
|
102 |
* `language_model_service.py`: answers questions using relevant context
|
103 |
* `question_dispatch_service.py`: listens for questions and retrieves relevant context to get answers
|
104 |
* `schedule_service.py`: runs background jobs
|
105 |
* `slack_service.py`: handles events from Slack and sends back responses
|
|
|
106 |
* `vectorization_service.py`: converts chunks into chunks with embeddings
|
107 |
* `tasks/`: scheduled tasks to run in the background
|
108 |
* `utils/`: reusable utilities
|
notebooks/google_drive.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb
CHANGED
@@ -27,9 +27,8 @@
|
|
27 |
"display_html = partial(display_html, raw=True)\n",
|
28 |
"\n",
|
29 |
"container = Container()\n",
|
30 |
-
"mime_type_handlers = container.mime_type_handlers()\n",
|
31 |
-
"mongo_db = await container.mongo_db()\n",
|
32 |
"google_drive_service = container.google_drive_service()\n",
|
|
|
33 |
"vectorization_service = container.vectorization_service()\n",
|
34 |
"vectorized_chunk_repository = await container.vectorized_chunk_repository()"
|
35 |
]
|
@@ -58,7 +57,7 @@
|
|
58 |
"cell_type": "markdown",
|
59 |
"metadata": {},
|
60 |
"source": [
|
61 |
-
"##
|
62 |
]
|
63 |
},
|
64 |
{
|
@@ -69,12 +68,26 @@
|
|
69 |
"source": [
|
70 |
"item_metadata = google_drive_service.list_directory(\"\", True)\n",
|
71 |
"display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
|
72 |
-
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in item_metadata), \"</ul>\")))
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
"recent_metadata = tuple(filter(lambda metadata: MODIFICATION_TIME_CUTOFF <= metadata.modified_time, item_metadata))\n",
|
75 |
"display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off.</p>\")\n",
|
76 |
-
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in recent_metadata), \"</ul>\")))
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
"metadata_to_process = tuple(filter(lambda metadata: metadata.mime_type == MIME_TYPE, recent_metadata))\n",
|
79 |
"display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off and MIME type (<em>{MIME_TYPE}</em>) criterion.</p>\")\n",
|
80 |
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in metadata_to_process), \"</ul>\")))"
|
@@ -87,7 +100,7 @@
|
|
87 |
"outputs": [],
|
88 |
"source": [
|
89 |
"web_vtt_parser = mime_type_handlers[MIME_TYPE]\n",
|
90 |
-
"display_html(f\"<p>{escape(
|
91 |
]
|
92 |
},
|
93 |
{
|
@@ -109,6 +122,13 @@
|
|
109 |
"display_html(f\"Processed {len(web_vtts)} files.\")"
|
110 |
]
|
111 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
{
|
113 |
"cell_type": "code",
|
114 |
"execution_count": null,
|
@@ -128,9 +148,7 @@
|
|
128 |
"cell_type": "markdown",
|
129 |
"metadata": {},
|
130 |
"source": [
|
131 |
-
"##
|
132 |
-
"\n",
|
133 |
-
"Don’t forget to clean up…"
|
134 |
]
|
135 |
},
|
136 |
{
|
|
|
27 |
"display_html = partial(display_html, raw=True)\n",
|
28 |
"\n",
|
29 |
"container = Container()\n",
|
|
|
|
|
30 |
"google_drive_service = container.google_drive_service()\n",
|
31 |
+
"mime_type_handlers = container.mime_type_handlers()\n",
|
32 |
"vectorization_service = container.vectorization_service()\n",
|
33 |
"vectorized_chunk_repository = await container.vectorized_chunk_repository()"
|
34 |
]
|
|
|
57 |
"cell_type": "markdown",
|
58 |
"metadata": {},
|
59 |
"source": [
|
60 |
+
"## Get the Files"
|
61 |
]
|
62 |
},
|
63 |
{
|
|
|
68 |
"source": [
|
69 |
"item_metadata = google_drive_service.list_directory(\"\", True)\n",
|
70 |
"display_html(f\"<p>Found {len(item_metadata)} files/folders.</p>\")\n",
|
71 |
+
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in item_metadata), \"</ul>\")))"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": null,
|
77 |
+
"metadata": {},
|
78 |
+
"outputs": [],
|
79 |
+
"source": [
|
80 |
"recent_metadata = tuple(filter(lambda metadata: MODIFICATION_TIME_CUTOFF <= metadata.modified_time, item_metadata))\n",
|
81 |
"display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off.</p>\")\n",
|
82 |
+
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in recent_metadata), \"</ul>\")))"
|
83 |
+
]
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"cell_type": "code",
|
87 |
+
"execution_count": null,
|
88 |
+
"metadata": {},
|
89 |
+
"outputs": [],
|
90 |
+
"source": [
|
91 |
"metadata_to_process = tuple(filter(lambda metadata: metadata.mime_type == MIME_TYPE, recent_metadata))\n",
|
92 |
"display_html(f\"<p>{len(item_metadata)} files/folders pass the modification time (<em>{MODIFICATION_TIME_CUTOFF}</em>) cut-off and MIME type (<em>{MIME_TYPE}</em>) criterion.</p>\")\n",
|
93 |
"display_html(\"\".join(chain(\"<ul>\", (f\"<li>{escape(metadata.folder_path)}/{escape(metadata.name)}</li>\" for metadata in metadata_to_process), \"</ul>\")))"
|
|
|
100 |
"outputs": [],
|
101 |
"source": [
|
102 |
"web_vtt_parser = mime_type_handlers[MIME_TYPE]\n",
|
103 |
+
"display_html(f\"<p><code>{escape(type(web_vtt_parser).__name__)}</code> instance identifier <code>{id(web_vtt_parser)}</code>\")"
|
104 |
]
|
105 |
},
|
106 |
{
|
|
|
122 |
"display_html(f\"Processed {len(web_vtts)} files.\")"
|
123 |
]
|
124 |
},
|
125 |
+
{
|
126 |
+
"cell_type": "markdown",
|
127 |
+
"metadata": {},
|
128 |
+
"source": [
|
129 |
+
"## Chunk, Vectorize, and Store"
|
130 |
+
]
|
131 |
+
},
|
132 |
{
|
133 |
"cell_type": "code",
|
134 |
"execution_count": null,
|
|
|
148 |
"cell_type": "markdown",
|
149 |
"metadata": {},
|
150 |
"source": [
|
151 |
+
"## Clean Up"
|
|
|
|
|
152 |
]
|
153 |
},
|
154 |
{
|
notebooks/web_vtt.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
src/ctp_slack_bot/app.py
CHANGED
@@ -1,12 +1,25 @@
|
|
1 |
-
from
|
2 |
-
from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
|
3 |
-
from json import dumps
|
4 |
from loguru import logger
|
5 |
from signal import SIGINT, SIGTERM
|
6 |
-
from typing import
|
7 |
|
8 |
-
from
|
9 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
async def main() -> None:
|
@@ -17,54 +30,31 @@ async def main() -> None:
|
|
17 |
# Set up dependency injection container.
|
18 |
container = Container()
|
19 |
container.wire(packages=["ctp_slack_bot"])
|
|
|
20 |
|
21 |
-
#
|
22 |
-
application_health_service = await container.application_health_service()
|
23 |
container.content_ingestion_service()
|
24 |
container.question_dispatch_service()
|
|
|
|
|
25 |
container.schedule_service()
|
|
|
26 |
|
27 |
-
|
28 |
-
health_statuses = await application_health_service.get_health()
|
29 |
-
if all(health_statuses.values()):
|
30 |
-
return Response(text=dumps(dict(health_statuses)), content_type="application/json")
|
31 |
-
else:
|
32 |
-
return Response(body=dumps(dict(health_statuses)), content_type="application/json", status=503)
|
33 |
-
http_server = WebApplication()
|
34 |
-
http_server.router.add_get("/health", health)
|
35 |
-
web_app_runner = WebAppRunner(http_server)
|
36 |
-
await web_app_runner.setup()
|
37 |
-
website = TCPSite(web_app_runner, "0.0.0.0", 8080) # TODO: Un-hard-code
|
38 |
-
await website.start()
|
39 |
-
|
40 |
-
async def handle_shutdown_signal() -> None:
|
41 |
-
logger.info("Received shutdown signal.")
|
42 |
-
await web_app_runner.cleanup()
|
43 |
-
for task in all_tasks():
|
44 |
-
if task is not current_task() and not task.done():
|
45 |
-
task.cancel()
|
46 |
-
logger.trace("Cancelled task {}.", task.get_name())
|
47 |
-
logger.info("Cancelled all tasks.")
|
48 |
-
|
49 |
-
def create_shutdown_signal_handler() -> Callable[[], None]:
|
50 |
-
def shutdown_signal_handler() -> None:
|
51 |
-
create_task(handle_shutdown_signal())
|
52 |
-
return shutdown_signal_handler
|
53 |
-
|
54 |
-
# Start the Slack socket mode handler in the background.
|
55 |
-
socket_mode_handler = await container.socket_mode_handler()
|
56 |
-
slack_bolt_task = create_task(socket_mode_handler.start_async())
|
57 |
shutdown_signal_handler = create_shutdown_signal_handler()
|
58 |
loop = get_running_loop()
|
59 |
loop.add_signal_handler(SIGINT, shutdown_signal_handler)
|
60 |
loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
|
|
|
|
|
61 |
try:
|
62 |
-
logger.info("Starting Slack Socket Mode handler…")
|
63 |
-
await
|
64 |
except CancelledError:
|
65 |
logger.info("Shutting down application…")
|
66 |
finally:
|
67 |
await socket_mode_handler.close_async()
|
|
|
68 |
await container.shutdown_resources()
|
69 |
|
70 |
|
|
|
1 |
+
from asyncio import all_tasks, CancelledError, create_task, current_task, gather, get_running_loop, run
|
|
|
|
|
2 |
from loguru import logger
|
3 |
from signal import SIGINT, SIGTERM
|
4 |
+
from typing import Callable
|
5 |
|
6 |
+
from containers import Container
|
7 |
+
from core.logging import setup_logging
|
8 |
+
|
9 |
+
|
10 |
+
async def handle_shutdown_signal() -> None:
|
11 |
+
logger.info("Received shutdown signal.")
|
12 |
+
for task in all_tasks():
|
13 |
+
if task is not current_task() and not task.done():
|
14 |
+
task.cancel()
|
15 |
+
logger.trace("Cancelled task {}.", task.get_name())
|
16 |
+
logger.info("Cancelled all tasks.")
|
17 |
+
|
18 |
+
|
19 |
+
def create_shutdown_signal_handler() -> Callable[[], None]:
|
20 |
+
def shutdown_signal_handler() -> None:
|
21 |
+
create_task(handle_shutdown_signal())
|
22 |
+
return shutdown_signal_handler
|
23 |
|
24 |
|
25 |
async def main() -> None:
|
|
|
30 |
# Set up dependency injection container.
|
31 |
container = Container()
|
32 |
container.wire(packages=["ctp_slack_bot"])
|
33 |
+
logger.debug("Created dependency injection container with providers: {}", '; '.join(container.providers))
|
34 |
|
35 |
+
# Initialize/instantiate services which should be active from the beginning.
|
|
|
36 |
container.content_ingestion_service()
|
37 |
container.question_dispatch_service()
|
38 |
+
http_server = await container.http_server()
|
39 |
+
socket_mode_handler = await container.socket_mode_handler()
|
40 |
container.schedule_service()
|
41 |
+
logger.debug("Initialized services.")
|
42 |
|
43 |
+
# Install the shutdown signal handler.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
shutdown_signal_handler = create_shutdown_signal_handler()
|
45 |
loop = get_running_loop()
|
46 |
loop.add_signal_handler(SIGINT, shutdown_signal_handler)
|
47 |
loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
|
48 |
+
|
49 |
+
# Start the HTTP server and Slack socket mode handler in the background; clean up resources when shut down.
|
50 |
try:
|
51 |
+
logger.info("Starting HTTP server and Slack Socket Mode handler…")
|
52 |
+
await gather(http_server.start(), socket_mode_handler.start_async())
|
53 |
except CancelledError:
|
54 |
logger.info("Shutting down application…")
|
55 |
finally:
|
56 |
await socket_mode_handler.close_async()
|
57 |
+
logger.info("Stopped Slack Socket Mode handler.")
|
58 |
await container.shutdown_resources()
|
59 |
|
60 |
|
src/ctp_slack_bot/containers.py
CHANGED
@@ -1,16 +1,19 @@
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
-
from dependency_injector.providers import Callable, Dict, List, Resource, Singleton
|
3 |
from importlib import import_module
|
|
|
4 |
from openai import AsyncOpenAI
|
5 |
from pkgutil import iter_modules
|
6 |
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
7 |
from slack_bolt.async_app import AsyncApp
|
8 |
from types import ModuleType
|
|
|
9 |
|
|
|
10 |
from ctp_slack_bot.core import Settings
|
11 |
from ctp_slack_bot.db.mongo_db import MongoDBResource
|
12 |
from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
|
13 |
-
from ctp_slack_bot.mime_type_handlers
|
14 |
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
15 |
from ctp_slack_bot.services.application_health_service import ApplicationHealthService
|
16 |
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
@@ -19,6 +22,7 @@ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelServi
|
|
19 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
20 |
from ctp_slack_bot.services.google_drive_service import GoogleDriveService
|
21 |
from ctp_slack_bot.services.http_client_service import HTTPClientServiceResource
|
|
|
22 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
23 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
24 |
from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
|
@@ -26,21 +30,16 @@ from ctp_slack_bot.services.slack_service import SlackServiceResource
|
|
26 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
27 |
|
28 |
|
29 |
-
def __scan_and_import_modules(package_name: str) -> None:
|
30 |
-
try:
|
31 |
-
package: Optional[ModuleType] = import_module(package_name)
|
32 |
-
except ImportError as e:
|
33 |
-
raise ValueError(f"Package {package_name} not found") from e
|
34 |
-
if not hasattr(package, '__path__'):
|
35 |
-
raise ValueError(f"{package_name} is not a package")
|
36 |
-
for _, module_name, is_pkg in iter_modules(package.__path__):
|
37 |
-
if not is_pkg:
|
38 |
-
import_module(f"{package.__name__}.{module_name}")
|
39 |
-
|
40 |
-
__scan_and_import_modules("ctp_slack_bot.mime_type_handlers")
|
41 |
-
|
42 |
-
|
43 |
class Container(DeclarativeContainer): # TODO: audit for potential async-related bugs.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
settings = Singleton(Settings)
|
45 |
event_brokerage_service = Singleton(EventBrokerageService)
|
46 |
schedule_service = Resource (ScheduleServiceResource,
|
@@ -102,3 +101,8 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
|
|
102 |
# mime_type_handler_factory=mime_type_handler_factory)
|
103 |
application_health_service = Singleton(ApplicationHealthService,
|
104 |
services=List(mongo_db, slack_service))
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
+
from dependency_injector.providers import Coroutine, Callable, Dict, List, Resource, Self, Singleton
|
3 |
from importlib import import_module
|
4 |
+
from itertools import chain
|
5 |
from openai import AsyncOpenAI
|
6 |
from pkgutil import iter_modules
|
7 |
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
8 |
from slack_bolt.async_app import AsyncApp
|
9 |
from types import ModuleType
|
10 |
+
from typing import Sequence
|
11 |
|
12 |
+
from ctp_slack_bot.controllers import ControllerBase, ControllerRegistry
|
13 |
from ctp_slack_bot.core import Settings
|
14 |
from ctp_slack_bot.db.mongo_db import MongoDBResource
|
15 |
from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
|
16 |
+
from ctp_slack_bot.mime_type_handlers import MimeTypeHandlerRegistry
|
17 |
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
18 |
from ctp_slack_bot.services.application_health_service import ApplicationHealthService
|
19 |
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
|
|
22 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
23 |
from ctp_slack_bot.services.google_drive_service import GoogleDriveService
|
24 |
from ctp_slack_bot.services.http_client_service import HTTPClientServiceResource
|
25 |
+
from ctp_slack_bot.services.http_server_service import HTTPServerResource
|
26 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
27 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
28 |
from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
|
|
|
30 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
31 |
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
class Container(DeclarativeContainer): # TODO: audit for potential async-related bugs.
|
34 |
+
async def __get_http_controllers(dependency_container: Self) -> Sequence[ControllerBase]:
|
35 |
+
return [controller_class(**{dependency_name: await dependency()
|
36 |
+
for dependency_name, dependency
|
37 |
+
in dependency_container.providers.items()
|
38 |
+
if dependency_name != "__self__" and dependency_name in controller_class.model_fields})
|
39 |
+
for controller_class
|
40 |
+
in ControllerRegistry.get_registry()] # TODO: mutable until async support is extended to tuples
|
41 |
+
|
42 |
+
__self__ = Self()
|
43 |
settings = Singleton(Settings)
|
44 |
event_brokerage_service = Singleton(EventBrokerageService)
|
45 |
schedule_service = Resource (ScheduleServiceResource,
|
|
|
101 |
# mime_type_handler_factory=mime_type_handler_factory)
|
102 |
application_health_service = Singleton(ApplicationHealthService,
|
103 |
services=List(mongo_db, slack_service))
|
104 |
+
http_controllers = Callable (__get_http_controllers,
|
105 |
+
dependency_container=__self__)
|
106 |
+
http_server = Resource (HTTPServerResource,
|
107 |
+
settings=settings,
|
108 |
+
controllers=http_controllers)
|
src/ctp_slack_bot/controllers/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .application_health_controller import ApplicationHealthController
|
2 |
+
from .base import ControllerBase, ControllerRegistry, Route
|
src/ctp_slack_bot/controllers/application_health_controller.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from aiohttp.web import json_response, Request, Response
|
2 |
+
from pydantic import ConfigDict
|
3 |
+
from typing import Self
|
4 |
+
|
5 |
+
from .base import ControllerBase, ControllerRegistry, Route
|
6 |
+
from ctp_slack_bot.services import ApplicationHealthService
|
7 |
+
|
8 |
+
|
9 |
+
@ControllerRegistry.register()
|
10 |
+
class ApplicationHealthController(ControllerBase):
|
11 |
+
"""
|
12 |
+
Application health reporting endpoints.
|
13 |
+
"""
|
14 |
+
|
15 |
+
application_health_service: ApplicationHealthService
|
16 |
+
|
17 |
+
@Route.get("/health")
|
18 |
+
async def get_health(self: Self, request: Request) -> Response:
|
19 |
+
health_statuses = await self.application_health_service.get_health()
|
20 |
+
return json_response(dict(health_statuses), status=200 if all(health_statuses.values()) else 503)
|
21 |
+
|
22 |
+
@property
|
23 |
+
def name(self: Self) -> str:
|
24 |
+
return "application_health_controller"
|
src/ctp_slack_bot/controllers/base.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from aiohttp.web import Request, Response
|
2 |
+
from functools import partial
|
3 |
+
from importlib import import_module
|
4 |
+
from inspect import getmembers, ismethod
|
5 |
+
from pydantic import BaseModel, ConfigDict
|
6 |
+
from typing import Awaitable, Callable, ClassVar, Mapping, Self, Sequence, TypeVar
|
7 |
+
|
8 |
+
from ctp_slack_bot.core import ApplicationComponentBase
|
9 |
+
|
10 |
+
|
11 |
+
AsyncHandler = Callable[[Request], Awaitable[Response]]
|
12 |
+
|
13 |
+
|
14 |
+
class Route(BaseModel):
|
15 |
+
model_config = ConfigDict(frozen=True)
|
16 |
+
|
17 |
+
method: str
|
18 |
+
path: str
|
19 |
+
handler: AsyncHandler
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def get(path: str) -> Callable[[AsyncHandler], AsyncHandler]:
|
23 |
+
def decorator(function: AsyncHandler) -> AsyncHandler:
|
24 |
+
function._http_method = "GET"
|
25 |
+
function._http_path = path
|
26 |
+
return function
|
27 |
+
return decorator
|
28 |
+
|
29 |
+
|
30 |
+
class ControllerBase(ApplicationComponentBase):
|
31 |
+
|
32 |
+
def get_routes(self: Self) -> Sequence[Route]:
|
33 |
+
return tuple(Route(method=method._http_method, path=method._http_path, handler=method)
|
34 |
+
for name, method in getmembers(self, predicate=ismethod)
|
35 |
+
if name != 'get_routes' and hasattr(method, "_http_method") and hasattr(method, "_http_path"))
|
36 |
+
|
37 |
+
|
38 |
+
T = TypeVar('T', bound=ControllerBase)
|
39 |
+
|
40 |
+
|
41 |
+
class ControllerRegistry:
|
42 |
+
|
43 |
+
_registry: ClassVar[list[T]] = []
|
44 |
+
|
45 |
+
@classmethod
|
46 |
+
def get_registry(cls) -> Mapping[T, Sequence[Route]]:
|
47 |
+
import_module(__package__)
|
48 |
+
return tuple(cls._registry)
|
49 |
+
|
50 |
+
@classmethod
|
51 |
+
def register(cls):
|
52 |
+
def decorator(controller_cls: T):
|
53 |
+
cls._registry.append(controller_cls)
|
54 |
+
return controller_cls
|
55 |
+
return decorator
|
src/ctp_slack_bot/core/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
from
|
2 |
-
from
|
|
|
1 |
+
from .abstractions import AbstractBaseModel, ApplicationComponentBase, HealthReportingApplicationComponentBase
|
2 |
+
from .config import Settings
|
src/ctp_slack_bot/core/logging.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
|
2 |
from loguru import logger
|
3 |
from os import access, getenv, W_OK
|
4 |
from sys import stderr
|
@@ -94,9 +94,10 @@ def setup_logging() -> None:
|
|
94 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
95 |
|
96 |
# Update logging levels for some noisy libraries.
|
97 |
-
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
|
98 |
getLogger(logger_name).setLevel(INFO)
|
99 |
for logger_name in ("apscheduler"):
|
100 |
getLogger(logger_name).setLevel(WARNING)
|
|
|
101 |
|
102 |
logger.info(f"Logging configured with level {log_level}")
|
|
|
1 |
+
from logging import __file__ as logging_file, basicConfig, currentframe, ERROR, getLogger, Handler, INFO, LogRecord, WARNING
|
2 |
from loguru import logger
|
3 |
from os import access, getenv, W_OK
|
4 |
from sys import stderr
|
|
|
94 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
95 |
|
96 |
# Update logging levels for some noisy libraries.
|
97 |
+
for logger_name in ("logging", "uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
|
98 |
getLogger(logger_name).setLevel(INFO)
|
99 |
for logger_name in ("apscheduler"):
|
100 |
getLogger(logger_name).setLevel(WARNING)
|
101 |
+
getLogger().setLevel(WARNING)
|
102 |
|
103 |
logger.info(f"Logging configured with level {log_level}")
|
src/ctp_slack_bot/db/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
from
|
|
|
1 |
+
from .mongo_db import MongoDB
|
src/ctp_slack_bot/db/mongo_db.py
CHANGED
@@ -47,7 +47,7 @@ class MongoDB(HealthReportingApplicationComponentBase):
|
|
47 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
48 |
self._client = None
|
49 |
self._db = None
|
50 |
-
raise
|
51 |
|
52 |
async def ping(self: Self) -> bool:
|
53 |
"""Check if MongoDB connection is alive."""
|
@@ -89,7 +89,7 @@ class MongoDB(HealthReportingApplicationComponentBase):
|
|
89 |
return collection
|
90 |
except Exception as e:
|
91 |
logger.error("Error accessing collection '{}': {}", name, e)
|
92 |
-
raise
|
93 |
|
94 |
def close(self: Self) -> None:
|
95 |
"""Close the MongoDB connection."""
|
@@ -125,7 +125,7 @@ class MongoDBResource(AsyncResource):
|
|
125 |
logger.error("MongoDB connection test failed!")
|
126 |
except Exception as e:
|
127 |
logger.error("Error testing MongoDB connection: {}", e)
|
128 |
-
raise
|
129 |
|
130 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
131 |
"""Close MongoDB connection on shutdown."""
|
|
|
47 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
48 |
self._client = None
|
49 |
self._db = None
|
50 |
+
raise e
|
51 |
|
52 |
async def ping(self: Self) -> bool:
|
53 |
"""Check if MongoDB connection is alive."""
|
|
|
89 |
return collection
|
90 |
except Exception as e:
|
91 |
logger.error("Error accessing collection '{}': {}", name, e)
|
92 |
+
raise e
|
93 |
|
94 |
def close(self: Self) -> None:
|
95 |
"""Close the MongoDB connection."""
|
|
|
125 |
logger.error("MongoDB connection test failed!")
|
126 |
except Exception as e:
|
127 |
logger.error("Error testing MongoDB connection: {}", e)
|
128 |
+
raise e
|
129 |
|
130 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
131 |
"""Close MongoDB connection on shutdown."""
|
src/ctp_slack_bot/db/repositories/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from
|
|
|
1 |
+
from .mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepository
|
2 |
+
from .vectorized_chunk_repository import VectorizedChunkRepository
|
3 |
+
from .mongo_db_vector_repository_base import MongoDbVectorRepositoryBase
|
src/ctp_slack_bot/db/repositories/mongo_db_vector_repository_base.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import abstractmethod
|
2 |
+
from loguru import logger
|
3 |
+
from motor.motor_asyncio import AsyncIOMotorCollection
|
4 |
+
from pydantic import ConfigDict
|
5 |
+
from pymongo.operations import SearchIndexModel
|
6 |
+
from typing import Self
|
7 |
+
|
8 |
+
from ctp_slack_bot.core import ApplicationComponentBase
|
9 |
+
|
10 |
+
|
11 |
+
class MongoDbVectorRepositoryBase(ApplicationComponentBase):
|
12 |
+
"""Basic MongoDB vector repository implementation"""
|
13 |
+
|
14 |
+
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
15 |
+
|
16 |
+
collection: AsyncIOMotorCollection
|
17 |
+
|
18 |
+
async def ensure_indices_exist(self: Self) -> None:
|
19 |
+
"""Ensure that indices exist."""
|
20 |
+
await self.ensure_search_index_exists()
|
21 |
+
|
22 |
+
async def ensure_search_index_exists(self: Self) -> None:
|
23 |
+
"""
|
24 |
+
Ensure that a vector search index exists.
|
25 |
+
"""
|
26 |
+
search_index_model = self._search_index_model
|
27 |
+
index_name = search_index_model.document["name"]
|
28 |
+
existing_indices = [index["name"] async for index in self.collection.list_search_indexes()]
|
29 |
+
logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
|
30 |
+
if index_name in existing_indices:
|
31 |
+
logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
|
32 |
+
return
|
33 |
+
result = await self.collection.create_search_index(search_index_model)
|
34 |
+
logger.info("Vector search index, {}, created for collection {}.", result, self.collection.name)
|
35 |
+
|
36 |
+
@property
|
37 |
+
@abstractmethod
|
38 |
+
def _search_index_model(self: Self) -> SearchIndexModel:
|
39 |
+
pass
|
src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py
CHANGED
@@ -1,18 +1,20 @@
|
|
1 |
from dependency_injector.resources import AsyncResource
|
2 |
from loguru import logger
|
3 |
from pymongo import ASCENDING, ReturnDocument
|
|
|
4 |
from typing import Any, Collection, Iterable, Mapping, Optional, Self, Sequence, Set
|
5 |
|
6 |
from ctp_slack_bot.core import Settings
|
7 |
from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
|
8 |
from ctp_slack_bot.db.mongo_db import MongoDB
|
9 |
-
from
|
10 |
-
from
|
11 |
|
12 |
-
|
13 |
-
class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkRepository):
|
14 |
"""MongoDB implementation of VectorizedChunkRepository"""
|
15 |
|
|
|
|
|
16 |
async def count_by_id(self: Self, parent_id: str, chunk_id: Optional[str] = None) -> int:
|
17 |
if chunk_id is None:
|
18 |
return await self.collection.count_documents({"parent_id": parent_id})
|
@@ -126,7 +128,6 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
|
|
126 |
return result.deleted_count
|
127 |
|
128 |
async def ensure_indices_exist(self: Self) -> None:
|
129 |
-
await super().ensure_indices_exist()
|
130 |
index_name = "parent_chunk_unique"
|
131 |
existing_indices = await self.collection.index_information()
|
132 |
logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
|
@@ -134,11 +135,30 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
|
|
134 |
logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
|
135 |
else:
|
136 |
await self.collection.create_index([("parent_id", ASCENDING), ("chunk_id", ASCENDING)], unique=True, name=index_name)
|
|
|
137 |
|
138 |
@property
|
139 |
def name(self: Self) -> str:
|
140 |
return "mongo_db_vectorized_chunk_repository"
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
class MongoVectorizedChunkRepositoryResource(AsyncResource):
|
144 |
async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
|
|
|
1 |
from dependency_injector.resources import AsyncResource
|
2 |
from loguru import logger
|
3 |
from pymongo import ASCENDING, ReturnDocument
|
4 |
+
from pymongo.operations import SearchIndexModel
|
5 |
from typing import Any, Collection, Iterable, Mapping, Optional, Self, Sequence, Set
|
6 |
|
7 |
from ctp_slack_bot.core import Settings
|
8 |
from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
|
9 |
from ctp_slack_bot.db.mongo_db import MongoDB
|
10 |
+
from .mongo_db_vector_repository_base import MongoDbVectorRepositoryBase
|
11 |
+
from .vectorized_chunk_repository import VectorizedChunkRepository
|
12 |
|
13 |
+
class MongoVectorizedChunkRepository(MongoDbVectorRepositoryBase, VectorizedChunkRepository):
|
|
|
14 |
"""MongoDB implementation of VectorizedChunkRepository"""
|
15 |
|
16 |
+
settings: Settings
|
17 |
+
|
18 |
async def count_by_id(self: Self, parent_id: str, chunk_id: Optional[str] = None) -> int:
|
19 |
if chunk_id is None:
|
20 |
return await self.collection.count_documents({"parent_id": parent_id})
|
|
|
128 |
return result.deleted_count
|
129 |
|
130 |
async def ensure_indices_exist(self: Self) -> None:
|
|
|
131 |
index_name = "parent_chunk_unique"
|
132 |
existing_indices = await self.collection.index_information()
|
133 |
logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
|
|
|
135 |
logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
|
136 |
else:
|
137 |
await self.collection.create_index([("parent_id", ASCENDING), ("chunk_id", ASCENDING)], unique=True, name=index_name)
|
138 |
+
await super().ensure_indices_exist()
|
139 |
|
140 |
@property
|
141 |
def name(self: Self) -> str:
|
142 |
return "mongo_db_vectorized_chunk_repository"
|
143 |
|
144 |
+
@property
|
145 |
+
def _search_index_model(self: Self) -> SearchIndexModel:
|
146 |
+
return SearchIndexModel(
|
147 |
+
definition={
|
148 |
+
"fields": [
|
149 |
+
{
|
150 |
+
"type": "vector",
|
151 |
+
"path": "embedding",
|
152 |
+
"numDimensions": self.settings.vector_dimension,
|
153 |
+
"similarity": "cosine",
|
154 |
+
"quantization": "scalar"
|
155 |
+
}
|
156 |
+
]
|
157 |
+
},
|
158 |
+
name=self.settings.vectorized_chunks_search_index_name or f"{self.collection.name}_vector_index",
|
159 |
+
type="vectorSearch"
|
160 |
+
)
|
161 |
+
|
162 |
|
163 |
class MongoVectorizedChunkRepositoryResource(AsyncResource):
|
164 |
async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
|
src/ctp_slack_bot/db/repositories/vector_repository_base.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
from loguru import logger
|
2 |
-
from motor.motor_asyncio import AsyncIOMotorCollection
|
3 |
-
from pydantic import ConfigDict
|
4 |
-
from pymongo.operations import SearchIndexModel
|
5 |
-
from typing import Self
|
6 |
-
|
7 |
-
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
8 |
-
|
9 |
-
|
10 |
-
class VectorRepositoryBase(ApplicationComponentBase):
|
11 |
-
"""MongoDB implementation of VectorizedChunkRepository"""
|
12 |
-
|
13 |
-
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
14 |
-
|
15 |
-
settings: Settings
|
16 |
-
collection: AsyncIOMotorCollection
|
17 |
-
|
18 |
-
async def ensure_indices_exist(self: Self) -> None:
|
19 |
-
"""Ensure that indices exist."""
|
20 |
-
await self.ensure_search_index_exists()
|
21 |
-
|
22 |
-
async def ensure_search_index_exists(self: Self) -> None:
|
23 |
-
"""
|
24 |
-
Ensure that a vector search index exists.
|
25 |
-
"""
|
26 |
-
index_name = self.settings.vectorized_chunks_search_index_name or f"{self.collection.name}_vector_index"
|
27 |
-
try:
|
28 |
-
existing_indices = [index["name"] async for index in self.collection.list_search_indexes()]
|
29 |
-
logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
|
30 |
-
if index_name in existing_indices:
|
31 |
-
logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
|
32 |
-
return
|
33 |
-
|
34 |
-
# Create search index model using MongoDB's recommended approach.
|
35 |
-
search_index_model = SearchIndexModel(
|
36 |
-
definition={
|
37 |
-
"fields": [
|
38 |
-
{
|
39 |
-
"type": "vector",
|
40 |
-
"path": "embedding",
|
41 |
-
"numDimensions": self.settings.vector_dimension,
|
42 |
-
"similarity": "cosine",
|
43 |
-
"quantization": "scalar"
|
44 |
-
}
|
45 |
-
]
|
46 |
-
},
|
47 |
-
name=index_name,
|
48 |
-
type="vectorSearch"
|
49 |
-
)
|
50 |
-
result = await self.collection.create_search_index(search_index_model)
|
51 |
-
logger.info("Vector search index, {}, created for collection {}.", result, self.collection.name)
|
52 |
-
except Exception as e:
|
53 |
-
if "command not found" in str(e).lower():
|
54 |
-
logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
|
55 |
-
await self.collection.create_index("embedding") # Create a fallback standard index on embedding field.
|
56 |
-
logger.info("Created standard index on field, {}, as fallback.", "embedding")
|
57 |
-
else:
|
58 |
-
logger.error("Failed to create any index: {}", e)
|
59 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/mime_type_handlers/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
from
|
2 |
-
from
|
|
|
1 |
+
from .base import MimeTypeHandler, MimeTypeHandlerRegistry
|
2 |
+
from .text.vtt import WebVTTMimeTypeHandler
|
src/ctp_slack_bot/mime_type_handlers/text/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1 |
-
from
|
|
|
1 |
+
from .vtt import WebVTTMimeTypeHandler
|
src/ctp_slack_bot/mime_type_handlers/text/vtt.py
CHANGED
@@ -6,7 +6,7 @@ from types import MappingProxyType
|
|
6 |
from typing import Any, ClassVar, Mapping, Optional, Self
|
7 |
from webvtt import WebVTT
|
8 |
|
9 |
-
from ctp_slack_bot.mime_type_handlers import MimeTypeHandler, MimeTypeHandlerRegistry
|
10 |
from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
|
11 |
|
12 |
|
|
|
6 |
from typing import Any, ClassVar, Mapping, Optional, Self
|
7 |
from webvtt import WebVTT
|
8 |
|
9 |
+
from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler, MimeTypeHandlerRegistry
|
10 |
from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
|
11 |
|
12 |
|
src/ctp_slack_bot/models/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from
|
4 |
-
from
|
|
|
1 |
+
from .base import Chunk, Content, VectorizedChunk, VectorQuery
|
2 |
+
from .google_drive import GoogleDriveMetadata
|
3 |
+
from .slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair
|
4 |
+
from .webvtt import WebVTTContent, WebVTTFrame
|
src/ctp_slack_bot/models/slack.py
CHANGED
@@ -4,7 +4,7 @@ from pydantic import BaseModel, ConfigDict, PositiveInt
|
|
4 |
from types import MappingProxyType
|
5 |
from typing import Any, Literal, Mapping, Optional, Self
|
6 |
|
7 |
-
from
|
8 |
|
9 |
|
10 |
class SlackEventPayload(BaseModel):
|
|
|
4 |
from types import MappingProxyType
|
5 |
from typing import Any, Literal, Mapping, Optional, Self
|
6 |
|
7 |
+
from .base import Chunk, Content
|
8 |
|
9 |
|
10 |
class SlackEventPayload(BaseModel):
|
src/ctp_slack_bot/models/webvtt.py
CHANGED
@@ -6,8 +6,8 @@ from types import MappingProxyType
|
|
6 |
from typing import Any, ClassVar, Literal, Mapping, Optional, Self
|
7 |
from webvtt import Caption, WebVTT
|
8 |
|
9 |
-
from ctp_slack_bot.models.base import Chunk, Content
|
10 |
from ctp_slack_bot.utils import to_deep_immutable
|
|
|
11 |
|
12 |
|
13 |
class WebVTTFrame(BaseModel):
|
|
|
6 |
from typing import Any, ClassVar, Literal, Mapping, Optional, Self
|
7 |
from webvtt import Caption, WebVTT
|
8 |
|
|
|
9 |
from ctp_slack_bot.utils import to_deep_immutable
|
10 |
+
from .base import Chunk, Content
|
11 |
|
12 |
|
13 |
class WebVTTFrame(BaseModel):
|
src/ctp_slack_bot/services/__init__.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from
|
|
|
|
|
|
1 |
+
from .answer_retrieval_service import AnswerRetrievalService
|
2 |
+
from .application_health_service import ApplicationHealthService
|
3 |
+
from .content_ingestion_service import ContentIngestionService
|
4 |
+
from .context_retrieval_service import ContextRetrievalService
|
5 |
+
from .embeddings_model_service import EmbeddingsModelService
|
6 |
+
from .event_brokerage_service import EventBrokerageService
|
7 |
+
from .google_drive_service import GoogleDriveService
|
8 |
+
from .http_server_service import HTTPServer
|
9 |
+
from .language_model_service import LanguageModelService
|
10 |
+
from .question_dispatch_service import QuestionDispatchService
|
11 |
+
from. schedule_service import ScheduleService
|
12 |
+
from .slack_service import SlackService
|
13 |
+
from .vectorization_service import VectorizationService
|
src/ctp_slack_bot/services/answer_retrieval_service.py
CHANGED
@@ -5,8 +5,8 @@ from typing import Collection, Self
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.enums import EventType
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse
|
8 |
-
from
|
9 |
-
from
|
10 |
|
11 |
|
12 |
class AnswerRetrievalService(ApplicationComponentBase):
|
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.enums import EventType
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse
|
8 |
+
from .event_brokerage_service import EventBrokerageService
|
9 |
+
from .language_model_service import LanguageModelService
|
10 |
|
11 |
|
12 |
class AnswerRetrievalService(ApplicationComponentBase):
|
src/ctp_slack_bot/services/content_ingestion_service.py
CHANGED
@@ -6,8 +6,8 @@ from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
|
6 |
from ctp_slack_bot.db.repositories import VectorizedChunkRepository
|
7 |
from ctp_slack_bot.enums import EventType
|
8 |
from ctp_slack_bot.models import Chunk, Content, SlackMessage
|
9 |
-
from
|
10 |
-
from
|
11 |
|
12 |
|
13 |
class ContentIngestionService(ApplicationComponentBase):
|
|
|
6 |
from ctp_slack_bot.db.repositories import VectorizedChunkRepository
|
7 |
from ctp_slack_bot.enums import EventType
|
8 |
from ctp_slack_bot.models import Chunk, Content, SlackMessage
|
9 |
+
from .event_brokerage_service import EventBrokerageService
|
10 |
+
from .vectorization_service import VectorizationService
|
11 |
|
12 |
|
13 |
class ContentIngestionService(ApplicationComponentBase):
|
src/ctp_slack_bot/services/context_retrieval_service.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Self, Sequence
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.db.repositories import VectorizedChunkRepository
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk
|
8 |
-
from
|
9 |
|
10 |
|
11 |
class ContextRetrievalService(ApplicationComponentBase):
|
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.db.repositories import VectorizedChunkRepository
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk
|
8 |
+
from .vectorization_service import VectorizationService
|
9 |
|
10 |
|
11 |
class ContextRetrievalService(ApplicationComponentBase):
|
src/ctp_slack_bot/services/http_server_service.py
CHANGED
@@ -1,15 +1,51 @@
|
|
1 |
-
from aiohttp.web import Application, AppRunner, Response, TCPSite
|
|
|
2 |
from dependency_injector.resources import AsyncResource
|
3 |
-
from
|
4 |
-
from
|
|
|
|
|
5 |
|
|
|
6 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
7 |
|
8 |
|
9 |
-
class
|
10 |
model_config = ConfigDict(frozen=True)
|
11 |
|
12 |
settings: Settings
|
|
|
|
|
|
|
13 |
|
14 |
-
async def
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from aiohttp.web import Application, AppRunner, Response, RouteTableDef, TCPSite
|
2 |
+
from dependency_injector.containers import DeclarativeContainer
|
3 |
from dependency_injector.resources import AsyncResource
|
4 |
+
from itertools import chain
|
5 |
+
from loguru import logger
|
6 |
+
from pydantic import ConfigDict, PrivateAttr
|
7 |
+
from typing import Self, Sequence
|
8 |
|
9 |
+
from ctp_slack_bot.controllers.base import ControllerBase, Route
|
10 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
11 |
|
12 |
|
13 |
+
class HTTPServer(ApplicationComponentBase):
|
14 |
model_config = ConfigDict(frozen=True)
|
15 |
|
16 |
settings: Settings
|
17 |
+
routes: Sequence[Route]
|
18 |
+
_app_runner: AppRunner = PrivateAttr()
|
19 |
+
_site: TCPSite = PrivateAttr()
|
20 |
|
21 |
+
async def initialize(self: Self) -> None:
|
22 |
+
app = Application()
|
23 |
+
for route in self.routes:
|
24 |
+
app.router.add_route(route.method, route.path, route.handler)
|
25 |
+
logger.info("Registered HTTP route: {} {}", route.method, route.path)
|
26 |
+
self._app_runner = AppRunner(app)
|
27 |
+
await self._app_runner.setup()
|
28 |
+
self._site = TCPSite(self._app_runner, self.settings.http_host, self.settings.http_port)
|
29 |
+
|
30 |
+
async def start(self: Self) -> None:
|
31 |
+
await self._site.start()
|
32 |
+
|
33 |
+
async def stop(self: Self) -> None:
|
34 |
+
await self._app_runner.cleanup()
|
35 |
+
|
36 |
+
@property
|
37 |
+
def name(self: Self) -> str:
|
38 |
+
return "http_server"
|
39 |
+
|
40 |
+
|
41 |
+
class HTTPServerResource(AsyncResource):
|
42 |
+
async def init(self: Self, settings: Settings, controllers: Sequence[ControllerBase]) -> HTTPServer:
|
43 |
+
routes = tuple(chain.from_iterable(controller.get_routes()
|
44 |
+
for controller
|
45 |
+
in controllers))
|
46 |
+
http_server = HTTPServer(settings=settings, routes=routes)
|
47 |
+
await http_server.initialize()
|
48 |
+
return http_server
|
49 |
+
|
50 |
+
async def shutdown(self: Self, http_server: HTTPServer) -> None:
|
51 |
+
await http_server.stop()
|
src/ctp_slack_bot/services/question_dispatch_service.py
CHANGED
@@ -5,9 +5,9 @@ from typing import Any, Self
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.enums import EventType
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage
|
8 |
-
from
|
9 |
-
from
|
10 |
-
from
|
11 |
|
12 |
|
13 |
class QuestionDispatchService(ApplicationComponentBase):
|
|
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.enums import EventType
|
7 |
from ctp_slack_bot.models import Chunk, SlackMessage
|
8 |
+
from .answer_retrieval_service import AnswerRetrievalService
|
9 |
+
from .context_retrieval_service import ContextRetrievalService
|
10 |
+
from .event_brokerage_service import EventBrokerageService
|
11 |
|
12 |
|
13 |
class QuestionDispatchService(ApplicationComponentBase):
|
src/ctp_slack_bot/services/slack_service.py
CHANGED
@@ -12,7 +12,7 @@ from typing import Any, ClassVar, Mapping, MutableMapping, Optional, Self, Set
|
|
12 |
from ctp_slack_bot.core import HealthReportingApplicationComponentBase
|
13 |
from ctp_slack_bot.enums import EventType
|
14 |
from ctp_slack_bot.models import SlackMessage, SlackResponse
|
15 |
-
from
|
16 |
|
17 |
|
18 |
class SlackService(HealthReportingApplicationComponentBase):
|
|
|
12 |
from ctp_slack_bot.core import HealthReportingApplicationComponentBase
|
13 |
from ctp_slack_bot.enums import EventType
|
14 |
from ctp_slack_bot.models import SlackMessage, SlackResponse
|
15 |
+
from .event_brokerage_service import EventBrokerageService
|
16 |
|
17 |
|
18 |
class SlackService(HealthReportingApplicationComponentBase):
|
src/ctp_slack_bot/services/vectorization_service.py
CHANGED
@@ -4,7 +4,7 @@ from typing import Self, Sequence
|
|
4 |
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.models import Chunk, VectorizedChunk
|
7 |
-
from
|
8 |
|
9 |
|
10 |
class VectorizationService(ApplicationComponentBase):
|
|
|
4 |
|
5 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
6 |
from ctp_slack_bot.models import Chunk, VectorizedChunk
|
7 |
+
from .embeddings_model_service import EmbeddingsModelService
|
8 |
|
9 |
|
10 |
class VectorizationService(ApplicationComponentBase):
|
src/ctp_slack_bot/utils/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
from
|
2 |
-
from
|
|
|
1 |
+
from .immutable import to_deep_immutable
|
2 |
+
from .secret_stripper import sanitize_mongo_db_uri
|