LiKenun commited on
Commit
bb7c9a3
·
1 Parent(s): a08a6f4

Refactor #3

Browse files
Files changed (37) hide show
  1. .env.template +4 -0
  2. README.md +14 -9
  3. pyproject.toml +2 -1
  4. scripts/run-dev.sh +1 -1
  5. src/ctp_slack_bot/app.py +10 -3
  6. src/ctp_slack_bot/containers.py +11 -5
  7. src/ctp_slack_bot/core/__init__.py +1 -0
  8. src/ctp_slack_bot/core/abstractions.py +31 -0
  9. src/ctp_slack_bot/core/config.py +47 -46
  10. src/ctp_slack_bot/core/logging.py +3 -2
  11. src/ctp_slack_bot/db/mongo_db.py +20 -17
  12. src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py +12 -8
  13. src/ctp_slack_bot/db/repositories/vector_repository_base.py +15 -18
  14. src/ctp_slack_bot/db/repositories/vectorized_chunk_repository.py +4 -2
  15. src/ctp_slack_bot/mime_type_handlers/base.py +4 -4
  16. src/ctp_slack_bot/mime_type_handlers/text/vtt.py +1 -0
  17. src/ctp_slack_bot/models/base.py +10 -8
  18. src/ctp_slack_bot/models/google_drive.py +2 -2
  19. src/ctp_slack_bot/models/slack.py +14 -5
  20. src/ctp_slack_bot/models/webvtt.py +3 -3
  21. src/ctp_slack_bot/services/__init__.py +1 -0
  22. src/ctp_slack_bot/services/answer_retrieval_service.py +9 -10
  23. src/ctp_slack_bot/services/application_database_service.py +9 -10
  24. src/ctp_slack_bot/services/application_health_service.py +25 -0
  25. src/ctp_slack_bot/services/content_ingestion_service.py +13 -10
  26. src/ctp_slack_bot/services/context_retrieval_service.py +12 -12
  27. src/ctp_slack_bot/services/embeddings_model_service.py +19 -16
  28. src/ctp_slack_bot/services/event_brokerage_service.py +11 -10
  29. src/ctp_slack_bot/services/google_drive_service.py +27 -24
  30. src/ctp_slack_bot/services/http_client_service.py +14 -0
  31. src/ctp_slack_bot/services/http_server_service.py +15 -0
  32. src/ctp_slack_bot/services/language_model_service.py +20 -19
  33. src/ctp_slack_bot/services/question_dispatch_service.py +12 -10
  34. src/ctp_slack_bot/services/schedule_service.py +16 -14
  35. src/ctp_slack_bot/services/slack_service.py +28 -16
  36. src/ctp_slack_bot/services/vectorization_service.py +10 -10
  37. src/ctp_slack_bot/utils/secret_stripper.py +1 -0
.env.template CHANGED
@@ -1,5 +1,9 @@
1
  # Copy this file and modify. Do not save or commit the secrets!
2
 
 
 
 
 
3
  # APScheduler Configuration
4
  SCHEDULER_TIMEZONE=UTC
5
 
 
1
  # Copy this file and modify. Do not save or commit the secrets!
2
 
3
+ # HTTP Server Configuration
4
+ HTTP_HOST=0.0.0.0
5
+ HTTP_PORT=8080
6
+
7
  # APScheduler Configuration
8
  SCHEDULER_TIMEZONE=UTC
9
 
README.md CHANGED
@@ -24,10 +24,10 @@ You need to configure it first. This is done via environment variables, or an `.
24
 
25
  Obtaining the values requires setting up API tokens/secrets with:
26
 
27
- * Slack: for `SLACK_BOT_TOKEN` and `SLACK_APP_TOKEN`
28
- * MongoDB: for `MONGODB_URI`
29
- * OpenAI: for `OPENAI_API_KEY`
30
- * Google Drive: for `GOOGLE_PROJECT_ID`, `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_EMAIL`, `GOOGLE_PRIVATE_KEY_ID`, and `GOOGLE_PRIVATE_KEY`
31
  * For Google Drive, set up a service account. It’s the only supported authentication type.
32
 
33
  ### Normally
@@ -58,12 +58,14 @@ pip3 install -e .
58
 
59
  Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
60
 
61
- If `localhost` port `8000` is free, running the following will make the application available on that port:
62
 
63
  ```sh
64
  scripts/run-dev.sh
65
  ```
66
 
 
 
67
  ## Tech Stack
68
 
69
  * Hugging Face Spaces for hosting
@@ -77,14 +79,17 @@ scripts/run-dev.sh
77
 
78
  ## General Project Structure
79
 
 
 
80
  * `src/`
81
  * `ctp_slack_bot/`
82
  * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
83
- * `db/`: database connection
84
- * `repositories/`: repository pattern implementation
85
- * `models/`: Pydantic models for data validation and serialization
86
  * `services/`: business logic
87
  * `answer_retrieval_service.py`: obtains an answer to a question from a language model using relevant context
 
88
  * `content_ingestion_service.py`: converts content into chunks and stores them into the database
89
  * `context_retrieval_service.py`: queries for relevant context from the database to answer a question
90
  * `embeddings_model_service.py`: converts text to embeddings
@@ -95,7 +100,7 @@ scripts/run-dev.sh
95
  * `slack_service.py`: handles events from Slack and sends back responses
96
  * `vector_database_service.py`: stores and queries chunks
97
  * `vectorization_service.py`: converts chunks into chunks with embeddings
98
- * `tasks/`: background scheduled jobs
99
  * `utils/`: reusable utilities
100
  * `app.py`: application entry point
101
  * `containers.py`: the dependency injection container
 
24
 
25
  Obtaining the values requires setting up API tokens/secrets with:
26
 
27
+ * Slack: for `slack_bot_token` and `slack_app_token`
28
+ * MongoDB: for `mongodb_uri`
29
+ * OpenAI: for `openai_api_key`
30
+ * Google Drive: for `google_project_id`, `google_client_id`, `google_client_email`, `google_private_key_id`, and `google_private_key`
31
  * For Google Drive, set up a service account. It’s the only supported authentication type.
32
 
33
  ### Normally
 
58
 
59
  Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
60
 
61
+ If `localhost` port `8080` is free, running the following will make the application available on that port:
62
 
63
  ```sh
64
  scripts/run-dev.sh
65
  ```
66
 
67
+ Visiting http://localhost:8080/health will return HTTP status OK and a payload containing the health status of individual components if everything is working.
68
+
69
  ## Tech Stack
70
 
71
  * Hugging Face Spaces for hosting
 
79
 
80
  ## General Project Structure
81
 
82
+ Not every file or folder is listed, but the important stuff is here.
83
+
84
  * `src/`
85
  * `ctp_slack_bot/`
86
  * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
87
+ * `db/`: data connection and interface logic
88
+ * `repositories/`: data collection/table interface logic
89
+ * `models/`: data models
90
  * `services/`: business logic
91
  * `answer_retrieval_service.py`: obtains an answer to a question from a language model using relevant context
92
+ * `application_health_service.py`: collects the health status of the application components
93
  * `content_ingestion_service.py`: converts content into chunks and stores them into the database
94
  * `context_retrieval_service.py`: queries for relevant context from the database to answer a question
95
  * `embeddings_model_service.py`: converts text to embeddings
 
100
  * `slack_service.py`: handles events from Slack and sends back responses
101
  * `vector_database_service.py`: stores and queries chunks
102
  * `vectorization_service.py`: converts chunks into chunks with embeddings
103
+ * `tasks/`: scheduled tasks to run in the background
104
  * `utils/`: reusable utilities
105
  * `app.py`: application entry point
106
  * `containers.py`: the dependency injection container
pyproject.toml CHANGED
@@ -19,7 +19,7 @@ classifiers = [
19
  "Operating System :: OS Independent",
20
  ]
21
  dependencies = [
22
- "pydantic>=2.11.2",
23
  "pydantic-settings>=2.8.1",
24
  "cachetools>=5.5.2",
25
  "more-itertools>=10.6.0",
@@ -30,6 +30,7 @@ dependencies = [
30
  "apscheduler>=3.11.0",
31
  # "tenacity>=9.1.2",
32
  # "pybreaker>=1.3.0",
 
33
  "aiohttp>=3.11.16",
34
  "webvtt-py>=0.5.1",
35
  "slack-sdk>=3.35.0",
 
19
  "Operating System :: OS Independent",
20
  ]
21
  dependencies = [
22
+ "pydantic[email]>=2.11.2",
23
  "pydantic-settings>=2.8.1",
24
  "cachetools>=5.5.2",
25
  "more-itertools>=10.6.0",
 
30
  "apscheduler>=3.11.0",
31
  # "tenacity>=9.1.2",
32
  # "pybreaker>=1.3.0",
33
+ "httpx>=0.28.1",
34
  "aiohttp>=3.11.16",
35
  "webvtt-py>=0.5.1",
36
  "slack-sdk>=3.35.0",
scripts/run-dev.sh CHANGED
@@ -2,4 +2,4 @@
2
 
3
  parent_path=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
4
 
5
- LOG_LEVEL=DEBUG python3 "${parent_path}/../src/ctp_slack_bot/app.py"
 
2
 
3
  parent_path=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
4
 
5
+ log_level=DEBUG python3 "${parent_path}/../src/ctp_slack_bot/app.py"
src/ctp_slack_bot/app.py CHANGED
@@ -1,5 +1,6 @@
1
  from aiohttp.web import Application as WebApplication, AppRunner as WebAppRunner, Response, TCPSite
2
  from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
 
3
  from loguru import logger
4
  from signal import SIGINT, SIGTERM
5
  from typing import Any, Callable
@@ -18,17 +19,22 @@ async def main() -> None:
18
  container.wire(packages=["ctp_slack_bot"])
19
 
20
  # Kick off services which should be active from the start.
 
21
  container.content_ingestion_service()
22
  container.question_dispatch_service()
23
  container.schedule_service()
24
 
25
- async def health(request):
26
- return Response(text="lol")
 
 
 
 
27
  http_server = WebApplication()
28
  http_server.router.add_get("/health", health)
29
  web_app_runner = WebAppRunner(http_server)
30
  await web_app_runner.setup()
31
- website = TCPSite(web_app_runner, "0.0.0.0", 8080)
32
  await website.start()
33
 
34
  async def handle_shutdown_signal() -> None:
@@ -61,5 +67,6 @@ async def main() -> None:
61
  await socket_mode_handler.close_async()
62
  await container.shutdown_resources()
63
 
 
64
  if __name__ == "__main__":
65
  run(main())
 
1
  from aiohttp.web import Application as WebApplication, AppRunner as WebAppRunner, Response, TCPSite
2
  from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
3
+ from json import dumps
4
  from loguru import logger
5
  from signal import SIGINT, SIGTERM
6
  from typing import Any, Callable
 
19
  container.wire(packages=["ctp_slack_bot"])
20
 
21
  # Kick off services which should be active from the start.
22
+ application_health_service = await container.application_health_service()
23
  container.content_ingestion_service()
24
  container.question_dispatch_service()
25
  container.schedule_service()
26
 
27
+ async def health(request): # TODO: Abstract away
28
+ health_statuses = await application_health_service.get_health()
29
+ if all(health_statuses.values()):
30
+ return Response(text=dumps(dict(health_statuses)), content_type="application/json")
31
+ else:
32
+ return Response(body=dumps(dict(health_statuses)), content_type="application/json", status=503)
33
  http_server = WebApplication()
34
  http_server.router.add_get("/health", health)
35
  web_app_runner = WebAppRunner(http_server)
36
  await web_app_runner.setup()
37
+ website = TCPSite(web_app_runner, "0.0.0.0", 8080) # TODO: Un-hard-code
38
  await website.start()
39
 
40
  async def handle_shutdown_signal() -> None:
 
67
  await socket_mode_handler.close_async()
68
  await container.shutdown_resources()
69
 
70
+
71
  if __name__ == "__main__":
72
  run(main())
src/ctp_slack_bot/containers.py CHANGED
@@ -1,21 +1,23 @@
1
  from dependency_injector.containers import DeclarativeContainer
2
- from dependency_injector.providers import Callable, Resource, Singleton
3
  from importlib import import_module
4
  from pkgutil import iter_modules
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
6
  from slack_bolt.async_app import AsyncApp
7
  from types import ModuleType
8
 
9
- from ctp_slack_bot.core.config import Settings
10
  from ctp_slack_bot.db.mongo_db import MongoDBResource
11
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
12
- from ctp_slack_bot.mime_type_handlers.base import MimeTypeHandler
13
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
 
14
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
15
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
16
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
17
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
18
  from ctp_slack_bot.services.google_drive_service import GoogleDriveService
 
19
  from ctp_slack_bot.services.language_model_service import LanguageModelService
20
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
21
  from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
@@ -42,6 +44,7 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
42
  event_brokerage_service = Singleton(EventBrokerageService)
43
  schedule_service = Resource (ScheduleServiceResource,
44
  settings=settings)
 
45
  mongo_db = Resource (MongoDBResource,
46
  settings=settings)
47
  vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
@@ -73,12 +76,13 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
73
  content_ingestion_service=content_ingestion_service,
74
  context_retrieval_service=context_retrieval_service,
75
  answer_retrieval_service=answer_retrieval_service)
76
- slack_bolt_app = Singleton(lambda settings: AsyncApp(token=settings.SLACK_BOT_TOKEN.get_secret_value()),
77
  settings)
78
  slack_service = Resource (SlackServiceResource,
79
  event_brokerage_service=event_brokerage_service,
 
80
  slack_bolt_app=slack_bolt_app)
81
- socket_mode_handler = Singleton(lambda _, app, settings: AsyncSocketModeHandler(app, settings.SLACK_APP_TOKEN.get_secret_value()),
82
  slack_service,
83
  slack_bolt_app,
84
  settings)
@@ -89,3 +93,5 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
89
  # settings=settings,
90
  # google_drive_service=google_drive_service,
91
  # mime_type_handler_factory=mime_type_handler_factory)
 
 
 
1
  from dependency_injector.containers import DeclarativeContainer
2
+ from dependency_injector.providers import Callable, List, Resource, Singleton
3
  from importlib import import_module
4
  from pkgutil import iter_modules
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
6
  from slack_bolt.async_app import AsyncApp
7
  from types import ModuleType
8
 
9
+ from ctp_slack_bot.core import Settings
10
  from ctp_slack_bot.db.mongo_db import MongoDBResource
11
  from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepositoryResource
12
+ from ctp_slack_bot.mime_type_handlers import MimeTypeHandler
13
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
14
+ from ctp_slack_bot.services.application_health_service import ApplicationHealthService
15
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
16
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
17
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
18
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
19
  from ctp_slack_bot.services.google_drive_service import GoogleDriveService
20
+ from ctp_slack_bot.services.http_client_service import HTTPClientServiceResource
21
  from ctp_slack_bot.services.language_model_service import LanguageModelService
22
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
23
  from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
 
44
  event_brokerage_service = Singleton(EventBrokerageService)
45
  schedule_service = Resource (ScheduleServiceResource,
46
  settings=settings)
47
+ http_client = Resource (HTTPClientServiceResource)
48
  mongo_db = Resource (MongoDBResource,
49
  settings=settings)
50
  vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
 
76
  content_ingestion_service=content_ingestion_service,
77
  context_retrieval_service=context_retrieval_service,
78
  answer_retrieval_service=answer_retrieval_service)
79
+ slack_bolt_app = Singleton(lambda settings: AsyncApp(token=settings.slack_bot_token.get_secret_value()),
80
  settings)
81
  slack_service = Resource (SlackServiceResource,
82
  event_brokerage_service=event_brokerage_service,
83
+ http_client=http_client,
84
  slack_bolt_app=slack_bolt_app)
85
+ socket_mode_handler = Singleton(lambda _, app, settings: AsyncSocketModeHandler(app, settings.slack_app_token.get_secret_value()),
86
  slack_service,
87
  slack_bolt_app,
88
  settings)
 
93
  # settings=settings,
94
  # google_drive_service=google_drive_service,
95
  # mime_type_handler_factory=mime_type_handler_factory)
96
+ application_health_service = Singleton(ApplicationHealthService,
97
+ services=List(mongo_db, slack_service))
src/ctp_slack_bot/core/__init__.py CHANGED
@@ -1 +1,2 @@
 
1
  from ctp_slack_bot.core.config import Settings
 
1
+ from ctp_slack_bot.core.abstractions import AbstractBaseModel, ApplicationComponentBase, HealthReportingApplicationComponentBase
2
  from ctp_slack_bot.core.config import Settings
src/ctp_slack_bot/core/abstractions.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from loguru import logger
3
+ from pydantic import BaseModel
4
+ from typing import Any, Self
5
+
6
+
7
+ class AbstractModelMetaclass(type(BaseModel), type(ABC)):
8
+ pass
9
+
10
+
11
+ class AbstractBaseModel(BaseModel, ABC, metaclass=AbstractModelMetaclass):
12
+ pass
13
+
14
+
15
+ class ApplicationComponentBase(AbstractBaseModel):
16
+
17
+ def __init__(self: Self, **data: dict[str, Any]) -> None:
18
+ super().__init__(**data)
19
+ logger.debug("Created {}", self.__class__.__name__)
20
+
21
+ @property
22
+ @abstractmethod
23
+ def name(self: Self) -> str:
24
+ pass
25
+
26
+
27
+ class HealthReportingApplicationComponentBase(ApplicationComponentBase):
28
+
29
+ @abstractmethod
30
+ async def is_healthy(self: Self) -> bool:
31
+ pass
src/ctp_slack_bot/core/config.py CHANGED
@@ -1,78 +1,79 @@
1
  from loguru import logger
2
- from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
3
  from pydantic_settings import BaseSettings, SettingsConfigDict
4
  from types import MappingProxyType
5
  from typing import Literal, Mapping, Optional, Self
6
 
 
7
  class Settings(BaseSettings):
8
  """
9
  Application settings loaded from environment variables.
10
  """
11
 
12
- def __init__(self: Self, **data) -> None:
13
- super().__init__(**data)
14
- logger.debug("Created {}", self.__class__.__name__)
15
- if self.__pydantic_extra__:
16
- logger.warning("Extra unrecognized environment variables were provided: {}", ", ".join(self.__pydantic_extra__))
 
 
17
 
18
  # Logging Configuration ― not actually used to configure Loguru, but defined to prevent warnings about “unknown” environment variables
19
- LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
20
- LOG_FORMAT: Literal["text", "json"] = "json"
 
 
 
 
21
 
22
  # APScheduler Configuration
23
- SCHEDULER_TIMEZONE: Optional[str] = "UTC"
24
 
25
  # Slack Configuration
26
- SLACK_BOT_TOKEN: SecretStr
27
- SLACK_APP_TOKEN: SecretStr
28
 
29
  # Vectorization Configuration
30
- EMBEDDING_MODEL: str
31
- VECTOR_DIMENSION: PositiveInt
32
- CHUNK_SIZE: PositiveInt
33
- CHUNK_OVERLAP: NonNegativeInt
34
- TOP_K_MATCHES: PositiveInt
35
 
36
  # MongoDB Configuration
37
- MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
38
- MONGODB_NAME: str
39
- VECTORIZED_CHUNKS_COLLECTION_NAME: str = "vectorized_chunks"
40
- VECTORIZED_CHUNKS_SEARCH_INDEX_NAME: Optional[str] = None
41
- SCORE_THRESHOLD: NonNegativeFloat
42
 
43
  # Hugging Face Configuration
44
- HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
45
 
46
  # OpenAI Configuration
47
- OPENAI_API_KEY: SecretStr
48
- CHAT_MODEL: str
49
- MAX_TOKENS: PositiveInt
50
- TEMPERATURE: NonNegativeFloat
51
- SYSTEM_PROMPT: str
52
 
53
  # Google Drive Configuration
54
- GOOGLE_DRIVE_ROOT_ID: str
55
- GOOGLE_PROJECT_ID: str
56
- GOOGLE_PRIVATE_KEY_ID: SecretStr
57
- GOOGLE_PRIVATE_KEY: SecretStr
58
- GOOGLE_CLIENT_ID: str
59
- GOOGLE_CLIENT_EMAIL: str
60
- GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth"
61
- GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
62
- GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs"
63
- GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com"
64
- GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com"
65
 
66
  # File Monitoring Configuration
67
- FILE_MONITOR_ROOT_PATH: str = ""
68
 
69
- model_config = SettingsConfigDict(
70
- env_file=".env",
71
- env_file_encoding="utf-8",
72
- case_sensitive=True,
73
- extra="allow",
74
- frozen=True
75
- )
76
 
77
  def get_extra_environment_variables(self: Self) -> Mapping[str, str]:
78
  return MappingProxyType(self.__pydantic_extra__)
 
1
  from loguru import logger
2
+ from pydantic import EmailStr, Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
3
  from pydantic_settings import BaseSettings, SettingsConfigDict
4
  from types import MappingProxyType
5
  from typing import Literal, Mapping, Optional, Self
6
 
7
+
8
  class Settings(BaseSettings):
9
  """
10
  Application settings loaded from environment variables.
11
  """
12
 
13
+ model_config = SettingsConfigDict(
14
+ case_sensitive=False,
15
+ env_file=".env",
16
+ env_file_encoding="utf-8",
17
+ extra="allow",
18
+ frozen=True
19
+ )
20
 
21
  # Logging Configuration ― not actually used to configure Loguru, but defined to prevent warnings about “unknown” environment variables
22
+ log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
23
+ log_format: Literal["text", "json"] = "json"
24
+
25
+ # HTTP Server Configuration
26
+ http_host: str = "0.0.0.0"
27
+ http_port: PositiveInt = 8080
28
 
29
  # APScheduler Configuration
30
+ scheduler_timezone: Optional[str] = "UTC"
31
 
32
  # Slack Configuration
33
+ slack_bot_token: SecretStr
34
+ slack_app_token: SecretStr
35
 
36
  # Vectorization Configuration
37
+ embedding_model: str
38
+ vector_dimension: PositiveInt
39
+ chunk_size: PositiveInt
40
+ chunk_overlap: NonNegativeInt
41
+ top_k_matches: PositiveInt
42
 
43
  # MongoDB Configuration
44
+ mongodb_uri: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
45
+ mongodb_name: str
46
+ vectorized_chunks_collection_name: str = "vectorized_chunks"
47
+ vectorized_chunks_search_index_name: Optional[str] = None
48
+ score_threshold: NonNegativeFloat
49
 
50
  # Hugging Face Configuration
51
+ hf_api_token: Optional[SecretStr] = None # TODO: Currently, this is unused.
52
 
53
  # OpenAI Configuration
54
+ openai_api_key: SecretStr
55
+ chat_model: str
56
+ max_tokens: PositiveInt
57
+ temperature: NonNegativeFloat
58
+ system_prompt: str
59
 
60
  # Google Drive Configuration
61
+ google_drive_root_id: str
62
+ google_project_id: str
63
+ google_private_key_id: SecretStr
64
+ google_private_key: SecretStr
65
+ google_client_id: str
66
+ google_client_email: EmailStr
67
+ google_token_uri: str = "https://oauth2.googleapis.com/token"
 
 
 
 
68
 
69
  # File Monitoring Configuration
70
+ file_monitor_root_path: str = ""
71
 
72
+ def __init__(self: Self, **data) -> None:
73
+ super().__init__(**data)
74
+ logger.debug("Created {}", self.__class__.__name__)
75
+ if self.__pydantic_extra__:
76
+ logger.warning("Extra unrecognized environment variables were provided: {}", ", ".join(self.__pydantic_extra__))
 
 
77
 
78
  def get_extra_environment_variables(self: Self) -> Mapping[str, str]:
79
  return MappingProxyType(self.__pydantic_extra__)
src/ctp_slack_bot/core/logging.py CHANGED
@@ -4,6 +4,7 @@ from os import access, getenv, W_OK
4
  from sys import stderr
5
  from typing import Self
6
 
 
7
  class InterceptHandler(Handler):
8
  """
9
  Intercept standard logging messages toward Loguru.
@@ -39,8 +40,8 @@ def setup_logging() -> None:
39
  """
40
 
41
  # Get logger configuration from environment variables.
42
- log_level = getenv("LOG_LEVEL", "INFO")
43
- log_format = getenv("LOG_FORMAT", "text")
44
 
45
  # Remove default loguru handler.
46
  logger.remove()
 
4
  from sys import stderr
5
  from typing import Self
6
 
7
+
8
  class InterceptHandler(Handler):
9
  """
10
  Intercept standard logging messages toward Loguru.
 
40
  """
41
 
42
  # Get logger configuration from environment variables.
43
+ log_level = getenv("log_level", "INFO")
44
+ log_format = getenv("log_format", "text")
45
 
46
  # Remove default loguru handler.
47
  logger.remove()
src/ctp_slack_bot/db/mongo_db.py CHANGED
@@ -1,34 +1,29 @@
1
  from dependency_injector.resources import AsyncResource
2
- from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCollection
3
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
4
  from loguru import logger
5
- from pydantic import BaseModel, PrivateAttr
6
- from typing import Any, Dict, Self
7
 
8
- from ctp_slack_bot.core.config import Settings
9
  from ctp_slack_bot.utils import sanitize_mongo_db_uri
10
 
11
 
12
- class MongoDB(BaseModel):
13
  """
14
  MongoDB connection manager using Motor for async operations.
15
  """
16
- settings: Settings
17
- _client: PrivateAttr = PrivateAttr()
18
- _db: PrivateAttr = PrivateAttr()
19
 
20
- class Config:
21
- frozen=True
22
- arbitrary_types_allowed = True
23
 
24
- def __init__(self: Self, **data: Dict[str, Any]) -> None:
25
- super().__init__(**data)
26
- logger.debug("Created {}", self.__class__.__name__)
27
 
28
  def connect(self: Self) -> None:
29
  """Initialize MongoDB client with settings."""
30
  try:
31
- connection_string = self.settings.MONGODB_URI.get_secret_value()
32
  logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
33
 
34
  # Create client with appropriate settings.
@@ -43,7 +38,7 @@ class MongoDB(BaseModel):
43
  )
44
 
45
  # Get the database name.
46
- db_name = self.settings.MONGODB_NAME
47
 
48
  self._db = self._client[db_name]
49
  logger.debug("MongoDB client initialized for database: {}", db_name)
@@ -104,9 +99,17 @@ class MongoDB(BaseModel):
104
  self._client = None
105
  self._db = None
106
 
 
 
 
 
 
 
 
 
107
  class MongoDBResource(AsyncResource):
108
  async def init(self: Self, settings: Settings) -> MongoDB:
109
- logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
110
  mongo_db = MongoDB(settings=settings)
111
  mongo_db.connect()
112
  await self._test_connection(mongo_db)
 
1
  from dependency_injector.resources import AsyncResource
2
+ from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase, AsyncIOMotorCollection
3
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
4
  from loguru import logger
5
+ from pydantic import ConfigDict
6
+ from typing import Any, Self
7
 
8
+ from ctp_slack_bot.core import HealthReportingApplicationComponentBase, Settings
9
  from ctp_slack_bot.utils import sanitize_mongo_db_uri
10
 
11
 
12
+ class MongoDB(HealthReportingApplicationComponentBase):
13
  """
14
  MongoDB connection manager using Motor for async operations.
15
  """
 
 
 
16
 
17
+ model_config = ConfigDict(frozen=True)
 
 
18
 
19
+ settings: Settings
20
+ _client: AsyncIOMotorClient
21
+ _db: AsyncIOMotorDatabase
22
 
23
  def connect(self: Self) -> None:
24
  """Initialize MongoDB client with settings."""
25
  try:
26
+ connection_string = self.settings.mongodb_uri.get_secret_value()
27
  logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
28
 
29
  # Create client with appropriate settings.
 
38
  )
39
 
40
  # Get the database name.
41
+ db_name = self.settings.mongodb_name
42
 
43
  self._db = self._client[db_name]
44
  logger.debug("MongoDB client initialized for database: {}", db_name)
 
99
  self._client = None
100
  self._db = None
101
 
102
+ @property
103
+ def name(self: Self) -> str:
104
+ return "mongo_db"
105
+
106
+ async def is_healthy(self: Self) -> bool:
107
+ return await self.ping()
108
+
109
+
110
  class MongoDBResource(AsyncResource):
111
  async def init(self: Self, settings: Settings) -> MongoDB:
112
+ logger.info("Initializing MongoDB connection for database: {}", settings.mongodb_name)
113
  mongo_db = MongoDB(settings=settings)
114
  mongo_db.connect()
115
  await self._test_connection(mongo_db)
src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py CHANGED
@@ -1,7 +1,7 @@
1
  from dependency_injector.resources import AsyncResource
2
  from loguru import logger
3
  from pymongo import ASCENDING, ReturnDocument
4
- from typing import Any, Collection, Dict, Iterable, Mapping, Optional, Self, Sequence, Set
5
 
6
  from ctp_slack_bot.core import Settings
7
  from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
@@ -13,10 +13,6 @@ from ctp_slack_bot.db.repositories.vector_repository_base import VectorRepositor
13
  class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkRepository):
14
  """MongoDB implementation of VectorizedChunkRepository"""
15
 
16
- def __init__(self: Self, **data: Dict[str, Any]) -> None:
17
- super().__init__(**data)
18
- logger.debug("Created {}", self.__class__.__name__)
19
-
20
  async def count_by_id(self: Self, parent_id: str, chunk_id: Optional[str] = None) -> int:
21
  if chunk_id is None:
22
  return await self.collection.count_documents({"parent_id": parent_id})
@@ -54,7 +50,7 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
54
  pipeline = [
55
  {
56
  "$vectorSearch": {
57
- "index": self.settings.VECTORIZED_CHUNKS_SEARCH_INDEX_NAME or f"{self.collection.name}_vector_index",
58
  "path": "embedding",
59
  "queryVector": query.query_embeddings,
60
  "numCandidates": query.k * 10,
@@ -133,12 +129,20 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
133
  await super().ensure_indices_exist()
134
  index_name = "parent_chunk_unique"
135
  existing_indices = await self.collection.index_information()
136
- if index_name not in existing_indices:
 
 
 
137
  await self.collection.create_index([("parent_id", ASCENDING), ("chunk_id", ASCENDING)], unique=True, name=index_name)
138
 
 
 
 
 
 
139
  class MongoVectorizedChunkRepositoryResource(AsyncResource):
140
  async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
141
- vectorized_chunk_collection = await mongo_db.get_collection(settings.VECTORIZED_CHUNKS_COLLECTION_NAME)
142
  vectorized_chunk_repository = MongoVectorizedChunkRepository(settings=settings, collection=vectorized_chunk_collection)
143
  await vectorized_chunk_repository.ensure_indices_exist()
144
  return vectorized_chunk_repository
 
1
  from dependency_injector.resources import AsyncResource
2
  from loguru import logger
3
  from pymongo import ASCENDING, ReturnDocument
4
+ from typing import Any, Collection, Iterable, Mapping, Optional, Self, Sequence, Set
5
 
6
  from ctp_slack_bot.core import Settings
7
  from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
 
13
  class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkRepository):
14
  """MongoDB implementation of VectorizedChunkRepository"""
15
 
 
 
 
 
16
  async def count_by_id(self: Self, parent_id: str, chunk_id: Optional[str] = None) -> int:
17
  if chunk_id is None:
18
  return await self.collection.count_documents({"parent_id": parent_id})
 
50
  pipeline = [
51
  {
52
  "$vectorSearch": {
53
+ "index": self.settings.vectorized_chunks_search_index_name or f"{self.collection.name}_vector_index",
54
  "path": "embedding",
55
  "queryVector": query.query_embeddings,
56
  "numCandidates": query.k * 10,
 
129
  await super().ensure_indices_exist()
130
  index_name = "parent_chunk_unique"
131
  existing_indices = await self.collection.index_information()
132
+ logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
133
+ if index_name in existing_indices:
134
+ logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
135
+ else:
136
  await self.collection.create_index([("parent_id", ASCENDING), ("chunk_id", ASCENDING)], unique=True, name=index_name)
137
 
138
+ @property
139
+ def name(self: Self) -> str:
140
+ return "mongo_db_vectorized_chunk_repository"
141
+
142
+
143
  class MongoVectorizedChunkRepositoryResource(AsyncResource):
144
  async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
145
+ vectorized_chunk_collection = await mongo_db.get_collection(settings.vectorized_chunks_collection_name)
146
  vectorized_chunk_repository = MongoVectorizedChunkRepository(settings=settings, collection=vectorized_chunk_collection)
147
  await vectorized_chunk_repository.ensure_indices_exist()
148
  return vectorized_chunk_repository
src/ctp_slack_bot/db/repositories/vector_repository_base.py CHANGED
@@ -1,22 +1,20 @@
1
- from abc import ABC
2
  from loguru import logger
3
  from motor.motor_asyncio import AsyncIOMotorCollection
4
- from pydantic import BaseModel
5
  from pymongo.operations import SearchIndexModel
6
  from typing import Self
7
 
8
- from ctp_slack_bot.core import Settings
9
 
10
- class VectorRepositoryBase(ABC, BaseModel):
 
11
  """MongoDB implementation of VectorizedChunkRepository"""
12
 
 
 
13
  settings: Settings
14
  collection: AsyncIOMotorCollection
15
 
16
- class Config:
17
- frozen=True
18
- arbitrary_types_allowed = True
19
-
20
  async def ensure_indices_exist(self: Self) -> None:
21
  """Ensure that indices exist."""
22
  await self.ensure_search_index_exists()
@@ -25,12 +23,12 @@ class VectorRepositoryBase(ABC, BaseModel):
25
  """
26
  Ensure that a vector search index exists.
27
  """
28
- index_name = self.settings.VECTORIZED_CHUNKS_SEARCH_INDEX_NAME or f"{self.collection.name}_vector_index"
29
  try:
30
- existing_indexes = [index["name"] async for index in self.collection.list_search_indexes()]
31
- logger.debug("{} existing indices were found: {}", len(existing_indexes), existing_indexes)
32
- if index_name in existing_indexes:
33
- logger.debug("Index '{}' already exists; duplicate index will not be created.", index_name)
34
  return
35
 
36
  # Create search index model using MongoDB's recommended approach.
@@ -40,7 +38,7 @@ class VectorRepositoryBase(ABC, BaseModel):
40
  {
41
  "type": "vector",
42
  "path": "embedding",
43
- "numDimensions": self.settings.VECTOR_DIMENSION,
44
  "similarity": "cosine",
45
  "quantization": "scalar"
46
  }
@@ -50,13 +48,12 @@ class VectorRepositoryBase(ABC, BaseModel):
50
  type="vectorSearch"
51
  )
52
  result = await self.collection.create_search_index(search_index_model)
53
- logger.info("Vector search index '{}' created for collection {}.", result, self.collection.name)
54
  except Exception as e:
55
  if "command not found" in str(e).lower():
56
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
57
- # Create a fallback standard index on embedding field.
58
- await self.collection.create_index("embedding")
59
- logger.info("Created standard index on {} field as fallback.", "embedding")
60
  else:
61
  logger.error("Failed to create any index: {}", e)
62
  raise
 
 
1
  from loguru import logger
2
  from motor.motor_asyncio import AsyncIOMotorCollection
3
+ from pydantic import ConfigDict
4
  from pymongo.operations import SearchIndexModel
5
  from typing import Self
6
 
7
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
8
 
9
+
10
+ class VectorRepositoryBase(ApplicationComponentBase):
11
  """MongoDB implementation of VectorizedChunkRepository"""
12
 
13
+ model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
14
+
15
  settings: Settings
16
  collection: AsyncIOMotorCollection
17
 
 
 
 
 
18
  async def ensure_indices_exist(self: Self) -> None:
19
  """Ensure that indices exist."""
20
  await self.ensure_search_index_exists()
 
23
  """
24
  Ensure that a vector search index exists.
25
  """
26
+ index_name = self.settings.vectorized_chunks_search_index_name or f"{self.collection.name}_vector_index"
27
  try:
28
+ existing_indices = [index["name"] async for index in self.collection.list_search_indexes()]
29
+ logger.debug("{} existing indices were found: {}", len(existing_indices), existing_indices)
30
+ if index_name in existing_indices:
31
+ logger.debug("Index, {}, already exists; duplicate index will not be created.", index_name)
32
  return
33
 
34
  # Create search index model using MongoDB's recommended approach.
 
38
  {
39
  "type": "vector",
40
  "path": "embedding",
41
+ "numDimensions": self.settings.vector_dimension,
42
  "similarity": "cosine",
43
  "quantization": "scalar"
44
  }
 
48
  type="vectorSearch"
49
  )
50
  result = await self.collection.create_search_index(search_index_model)
51
+ logger.info("Vector search index, {}, created for collection {}.", result, self.collection.name)
52
  except Exception as e:
53
  if "command not found" in str(e).lower():
54
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
55
+ await self.collection.create_index("embedding") # Create a fallback standard index on embedding field.
56
+ logger.info("Created standard index on field, {}, as fallback.", "embedding")
 
57
  else:
58
  logger.error("Failed to create any index: {}", e)
59
  raise
src/ctp_slack_bot/db/repositories/vectorized_chunk_repository.py CHANGED
@@ -1,10 +1,12 @@
1
- from abc import ABC, abstractmethod
2
  from pydantic import BaseModel
3
  from typing import Any, Collection, Iterable, Mapping, Optional, Self, Sequence, Set
4
 
 
5
  from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
6
 
7
- class VectorizedChunkRepository(ABC, BaseModel):
 
8
  """Repository interface for VectorizedChunk entities."""
9
 
10
  @abstractmethod
 
1
+ from abc import abstractmethod
2
  from pydantic import BaseModel
3
  from typing import Any, Collection, Iterable, Mapping, Optional, Self, Sequence, Set
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase
6
  from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery
7
 
8
+
9
+ class VectorizedChunkRepository(ApplicationComponentBase):
10
  """Repository interface for VectorizedChunk entities."""
11
 
12
  @abstractmethod
src/ctp_slack_bot/mime_type_handlers/base.py CHANGED
@@ -1,6 +1,6 @@
1
- from abc import ABCMeta, abstractmethod
2
  from functools import lru_cache
3
- from typing import Any, ClassVar, Dict, Mapping, Optional
4
 
5
  from ctp_slack_bot.models import Content
6
 
@@ -9,13 +9,13 @@ class MimeTypeHandlerMeta(type):
9
 
10
  _registry: ClassVar[dict[str, type["BaseMimeTypeHandler"]]] = {}
11
 
12
- def __init__(cls, name: str, bases: tuple[type, ...], dict: Dict[str, Any]) -> None:
13
  super().__init__(name, bases, dict)
14
  if hasattr(cls, "MIME_TYPE"):
15
  MimeTypeHandlerMeta._registry[cls.MIME_TYPE] = cls
16
 
17
 
18
- class MimeTypeHandlerABCMeta(MimeTypeHandlerMeta, ABCMeta):
19
  pass
20
 
21
 
 
1
+ from abc import ABC, abstractmethod
2
  from functools import lru_cache
3
+ from typing import Any, ClassVar, Mapping, Optional
4
 
5
  from ctp_slack_bot.models import Content
6
 
 
9
 
10
  _registry: ClassVar[dict[str, type["BaseMimeTypeHandler"]]] = {}
11
 
12
+ def __init__(cls, name: str, bases: tuple[type, ...], dict: dict[str, Any]) -> None:
13
  super().__init__(name, bases, dict)
14
  if hasattr(cls, "MIME_TYPE"):
15
  MimeTypeHandlerMeta._registry[cls.MIME_TYPE] = cls
16
 
17
 
18
+ class MimeTypeHandlerABCMeta(MimeTypeHandlerMeta, type(ABC)):
19
  pass
20
 
21
 
src/ctp_slack_bot/mime_type_handlers/text/vtt.py CHANGED
@@ -11,6 +11,7 @@ from ctp_slack_bot.models import Content, WebVTTContent, WebVTTFrame
11
 
12
  ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
13
 
 
14
  class WebVTTMimeTypeHandler(MimeTypeHandler):
15
 
16
  MIME_TYPE = "text/vtt"
 
11
 
12
  ISO_DATE_TIME_PATTERN = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
13
 
14
+
15
  class WebVTTMimeTypeHandler(MimeTypeHandler):
16
 
17
  MIME_TYPE = "text/vtt"
src/ctp_slack_bot/models/base.py CHANGED
@@ -1,19 +1,21 @@
1
- from abc import ABC, abstractmethod
2
  from pydantic import BaseModel, ConfigDict, Field, field_validator
 
3
  from typing import Any, final, Mapping, Optional, Self
4
 
 
5
  from ctp_slack_bot.utils import to_deep_immutable
6
 
7
 
8
  class Chunk(BaseModel):
9
  """A class representing a chunk of content."""
10
 
 
 
11
  text: str # The text representation
12
  parent_id: str # The source content’s identity
13
  chunk_id: str # This chunk’s identity—unique within the source content
14
- metadata: Mapping[str, Any] = Field(default_factory=dict)
15
-
16
- model_config = ConfigDict(frozen=True)
17
 
18
  @field_validator('metadata')
19
  @classmethod
@@ -32,12 +34,12 @@ class VectorQuery(BaseModel):
32
  filter_metadata: Optional filters for metadata fields
33
  """
34
 
 
 
35
  query_embeddings: tuple[float, ...]
36
  k: int
37
  score_threshold: float = Field(default=0.7)
38
- filter_metadata: Mapping[str, Any] = Field(default_factory=dict)
39
-
40
- model_config = ConfigDict(frozen=True)
41
 
42
  @field_validator('filter_metadata')
43
  @classmethod
@@ -52,7 +54,7 @@ class VectorizedChunk(Chunk):
52
  embedding: tuple[float, ...] # The vector representation
53
 
54
 
55
- class Content(ABC, BaseModel):
56
  """An abstract base class for all types of content."""
57
 
58
  model_config = ConfigDict(frozen=True)
 
1
+ from abc import abstractmethod
2
  from pydantic import BaseModel, ConfigDict, Field, field_validator
3
+ from types import MappingProxyType
4
  from typing import Any, final, Mapping, Optional, Self
5
 
6
+ from ctp_slack_bot.core import AbstractBaseModel
7
  from ctp_slack_bot.utils import to_deep_immutable
8
 
9
 
10
  class Chunk(BaseModel):
11
  """A class representing a chunk of content."""
12
 
13
+ model_config = ConfigDict(frozen=True)
14
+
15
  text: str # The text representation
16
  parent_id: str # The source content’s identity
17
  chunk_id: str # This chunk’s identity—unique within the source content
18
+ metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
 
 
19
 
20
  @field_validator('metadata')
21
  @classmethod
 
34
  filter_metadata: Optional filters for metadata fields
35
  """
36
 
37
+ model_config = ConfigDict(frozen=True)
38
+
39
  query_embeddings: tuple[float, ...]
40
  k: int
41
  score_threshold: float = Field(default=0.7)
42
+ filter_metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
 
 
43
 
44
  @field_validator('filter_metadata')
45
  @classmethod
 
54
  embedding: tuple[float, ...] # The vector representation
55
 
56
 
57
+ class Content(AbstractBaseModel):
58
  """An abstract base class for all types of content."""
59
 
60
  model_config = ConfigDict(frozen=True)
src/ctp_slack_bot/models/google_drive.py CHANGED
@@ -6,14 +6,14 @@ from typing import Self
6
  class GoogleDriveMetadata(BaseModel):
7
  """Represents Google Drive file or folder metadata."""
8
 
 
 
9
  id: str
10
  name: str
11
  modified_time: datetime
12
  mime_type: str
13
  folder_path: str
14
 
15
- model_config = ConfigDict(frozen=True)
16
-
17
  @classmethod
18
  def from_folder_path_and_dict(cls, folder_path: str, dict: dict) -> Self:
19
  id = dict["id"]
 
6
  class GoogleDriveMetadata(BaseModel):
7
  """Represents Google Drive file or folder metadata."""
8
 
9
+ model_config = ConfigDict(frozen=True)
10
+
11
  id: str
12
  name: str
13
  modified_time: datetime
14
  mime_type: str
15
  folder_path: str
16
 
 
 
17
  @classmethod
18
  def from_folder_path_and_dict(cls, folder_path: str, dict: dict) -> Self:
19
  id = dict["id"]
src/ctp_slack_bot/models/slack.py CHANGED
@@ -1,21 +1,26 @@
1
  from datetime import datetime
2
  from json import dumps
3
- from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr
4
  from types import MappingProxyType
5
  from typing import Any, Literal, Mapping, Optional, Self
6
 
7
  from ctp_slack_bot.models.base import Chunk, Content
8
 
 
9
  class SlackEventPayload(BaseModel):
10
  """Represents a general event payload from Slack."""
 
 
 
11
  type: str
12
  event_ts: str
13
 
14
- model_config = ConfigDict(extra='allow', frozen=True)
15
 
16
  class SlackEvent(BaseModel):
17
  """Represents a general event from Slack."""
18
 
 
 
19
  token: str
20
  team_id: str
21
  api_app_id: str
@@ -25,24 +30,25 @@ class SlackEvent(BaseModel):
25
  event_time: int
26
  authed_users: tuple[str, ...]
27
 
28
- model_config = ConfigDict(frozen=True)
29
 
30
  class SlackUserTimestampPair(BaseModel):
31
  """Represents a Slack user-timestamp pair."""
32
 
 
 
33
  user: str
34
  ts: str
35
 
36
- model_config = ConfigDict(frozen=True)
37
 
38
  class SlackReaction(BaseModel):
39
  """Represents a Slack reaction information."""
40
 
 
 
41
  name: str
42
  count: PositiveInt
43
  users: tuple[str, ...]
44
 
45
- model_config = ConfigDict(frozen=True)
46
 
47
  class SlackMessage(Content):
48
  """Represents a message from Slack after adaptation."""
@@ -76,9 +82,12 @@ class SlackMessage(Content):
76
  "modificationTime": datetime.fromtimestamp(float(self.ts))
77
  })
78
 
 
79
  class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
80
  """Represents a response message to be sent to Slack."""
81
 
 
 
82
  text: str
83
  channel: Optional[str]
84
  thread_ts: Optional[str] = None
 
1
  from datetime import datetime
2
  from json import dumps
3
+ from pydantic import BaseModel, ConfigDict, PositiveInt
4
  from types import MappingProxyType
5
  from typing import Any, Literal, Mapping, Optional, Self
6
 
7
  from ctp_slack_bot.models.base import Chunk, Content
8
 
9
+
10
  class SlackEventPayload(BaseModel):
11
  """Represents a general event payload from Slack."""
12
+
13
+ model_config = ConfigDict(extra='allow', frozen=True)
14
+
15
  type: str
16
  event_ts: str
17
 
 
18
 
19
  class SlackEvent(BaseModel):
20
  """Represents a general event from Slack."""
21
 
22
+ model_config = ConfigDict(frozen=True)
23
+
24
  token: str
25
  team_id: str
26
  api_app_id: str
 
30
  event_time: int
31
  authed_users: tuple[str, ...]
32
 
 
33
 
34
  class SlackUserTimestampPair(BaseModel):
35
  """Represents a Slack user-timestamp pair."""
36
 
37
+ model_config = ConfigDict(frozen=True)
38
+
39
  user: str
40
  ts: str
41
 
 
42
 
43
  class SlackReaction(BaseModel):
44
  """Represents a Slack reaction information."""
45
 
46
+ model_config = ConfigDict(frozen=True)
47
+
48
  name: str
49
  count: PositiveInt
50
  users: tuple[str, ...]
51
 
 
52
 
53
  class SlackMessage(Content):
54
  """Represents a message from Slack after adaptation."""
 
82
  "modificationTime": datetime.fromtimestamp(float(self.ts))
83
  })
84
 
85
+
86
  class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
87
  """Represents a response message to be sent to Slack."""
88
 
89
+ model_config = ConfigDict(frozen=True)
90
+
91
  text: str
92
  channel: Optional[str]
93
  thread_ts: Optional[str] = None
src/ctp_slack_bot/models/webvtt.py CHANGED
@@ -18,14 +18,14 @@ SPEAKER_SPEECH_TEXT_SEPARATOR = ": "
18
  class WebVTTFrame(BaseModel):
19
  """Represents a WebVTT frame"""
20
 
 
 
21
  identifier: str
22
  start: timedelta
23
  end: timedelta
24
  speaker: Optional[str] = None
25
  speech: str
26
 
27
- model_config = ConfigDict(frozen=True)
28
-
29
  @classmethod
30
  def from_webvtt_caption(cls, caption: Caption, index: int) -> Self:
31
  identifier = caption.identifier if caption.identifier else str(index)
@@ -42,7 +42,7 @@ class WebVTTContent(Content):
42
  """Represents parsed WebVTT content."""
43
 
44
  id: str
45
- metadata: Mapping[str, Any] = Field(default_factory=dict)
46
  start_time: Optional[datetime]
47
  frames: tuple[WebVTTFrame, ...]
48
 
 
18
  class WebVTTFrame(BaseModel):
19
  """Represents a WebVTT frame"""
20
 
21
+ model_config = ConfigDict(frozen=True)
22
+
23
  identifier: str
24
  start: timedelta
25
  end: timedelta
26
  speaker: Optional[str] = None
27
  speech: str
28
 
 
 
29
  @classmethod
30
  def from_webvtt_caption(cls, caption: Caption, index: int) -> Self:
31
  identifier = caption.identifier if caption.identifier else str(index)
 
42
  """Represents parsed WebVTT content."""
43
 
44
  id: str
45
+ metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
46
  start_time: Optional[datetime]
47
  frames: tuple[WebVTTFrame, ...]
48
 
src/ctp_slack_bot/services/__init__.py CHANGED
@@ -1,4 +1,5 @@
1
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
 
2
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
3
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
 
1
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
2
+ from ctp_slack_bot.services.application_health_service import ApplicationHealthService
3
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
4
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
5
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
src/ctp_slack_bot/services/answer_retrieval_service.py CHANGED
@@ -1,30 +1,25 @@
1
  from loguru import logger
2
- from pydantic import BaseModel
3
  from typing import Collection, Self
4
 
5
- from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.enums import EventType
7
  from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse
8
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
9
  from ctp_slack_bot.services.language_model_service import LanguageModelService
10
 
11
 
12
- class AnswerRetrievalService(BaseModel):
13
  """
14
  Service for context-based answer retrievel from a language model.
15
  """
16
 
 
 
17
  settings: Settings
18
  event_brokerage_service: EventBrokerageService
19
  language_model_service: LanguageModelService
20
 
21
- class Config:
22
- frozen=True
23
-
24
- def __init__(self: Self, **data) -> None:
25
- super().__init__(**data)
26
- logger.debug("Created {}", self.__class__.__name__)
27
-
28
  async def push(self: Self, question: SlackMessage, context: Collection[Chunk]) -> None:
29
  channel_to_respond_to = question.channel
30
  thread_to_respond_to = question.thread_ts if question.thread_ts else question.ts
@@ -32,3 +27,7 @@ class AnswerRetrievalService(BaseModel):
32
  logger.debug("Pushing response to channel {} and thread {}: {}", channel_to_respond_to, thread_to_respond_to, answer)
33
  slack_response = SlackResponse(text=answer, channel=channel_to_respond_to, thread_ts=thread_to_respond_to)
34
  await self.event_brokerage_service.publish(EventType.OUTGOING_SLACK_RESPONSE, slack_response)
 
 
 
 
 
1
  from loguru import logger
2
+ from pydantic import ConfigDict
3
  from typing import Collection, Self
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
6
  from ctp_slack_bot.enums import EventType
7
  from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse
8
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
9
  from ctp_slack_bot.services.language_model_service import LanguageModelService
10
 
11
 
12
+ class AnswerRetrievalService(ApplicationComponentBase):
13
  """
14
  Service for context-based answer retrievel from a language model.
15
  """
16
 
17
+ model_config = ConfigDict(frozen=True)
18
+
19
  settings: Settings
20
  event_brokerage_service: EventBrokerageService
21
  language_model_service: LanguageModelService
22
 
 
 
 
 
 
 
 
23
  async def push(self: Self, question: SlackMessage, context: Collection[Chunk]) -> None:
24
  channel_to_respond_to = question.channel
25
  thread_to_respond_to = question.thread_ts if question.thread_ts else question.ts
 
27
  logger.debug("Pushing response to channel {} and thread {}: {}", channel_to_respond_to, thread_to_respond_to, answer)
28
  slack_response = SlackResponse(text=answer, channel=channel_to_respond_to, thread_ts=thread_to_respond_to)
29
  await self.event_brokerage_service.publish(EventType.OUTGOING_SLACK_RESPONSE, slack_response)
30
+
31
+ @property
32
+ def name(self: Self) -> str:
33
+ return "answer_retrieval_service"
src/ctp_slack_bot/services/application_database_service.py CHANGED
@@ -1,25 +1,20 @@
1
  from datetime import datetime
2
  from loguru import logger
3
- from pydantic import BaseModel, PrivateAttr
4
  from typing import Iterable, Mapping, Self
5
 
6
- from ctp_slack_bot.core import Settings
7
  from ctp_slack_bot.db import MongoDB
8
 
9
 
10
- class ApplicationDatabaseService(BaseModel):
11
  """Service for retrieving and persisting application state."""
12
 
 
 
13
  settings: Settings
14
  mongo_db: MongoDB # TODO: This should be replaced following the repository pattern―one repository class per collection.
15
 
16
- class Config:
17
- frozen=True
18
-
19
- def __init__(self: Self, **data) -> None:
20
- super().__init__(**data)
21
- logger.debug("Created {}", self.__class__.__name__)
22
-
23
  async def get_last_modification_times_by_file_paths(self: Self, file_paths: Iterable[str]) -> Mapping[str, datetime]:
24
  """Retrieve the last modification time for each file path."""
25
  raise NotImplementedError() # TODO
@@ -27,3 +22,7 @@ class ApplicationDatabaseService(BaseModel):
27
  async def set_last_modification_time_by_file_path(self: Self, file_path: str, modification_time: datetime) -> None:
28
  """Set the last modification time for a file path."""
29
  raise NotImplementedError() # TODO
 
 
 
 
 
1
  from datetime import datetime
2
  from loguru import logger
3
+ from pydantic import ConfigDict
4
  from typing import Iterable, Mapping, Self
5
 
6
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
7
  from ctp_slack_bot.db import MongoDB
8
 
9
 
10
+ class ApplicationDatabaseService(ApplicationComponentBase):
11
  """Service for retrieving and persisting application state."""
12
 
13
+ model_config = ConfigDict(frozen=True)
14
+
15
  settings: Settings
16
  mongo_db: MongoDB # TODO: This should be replaced following the repository pattern―one repository class per collection.
17
 
 
 
 
 
 
 
 
18
  async def get_last_modification_times_by_file_paths(self: Self, file_paths: Iterable[str]) -> Mapping[str, datetime]:
19
  """Retrieve the last modification time for each file path."""
20
  raise NotImplementedError() # TODO
 
22
  async def set_last_modification_time_by_file_path(self: Self, file_path: str, modification_time: datetime) -> None:
23
  """Set the last modification time for a file path."""
24
  raise NotImplementedError() # TODO
25
+
26
+ @property
27
+ def name(self: Self) -> str:
28
+ return "application_database_service"
src/ctp_slack_bot/services/application_health_service.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from pydantic import ConfigDict
3
+ from types import MappingProxyType
4
+ from typing import Collection, Mapping, Self
5
+
6
+ from ctp_slack_bot.core import ApplicationComponentBase, HealthReportingApplicationComponentBase
7
+
8
+
9
+ class ApplicationHealthService(ApplicationComponentBase):
10
+ """
11
+ Service for checking and reporting application health.
12
+ """
13
+
14
+ model_config = ConfigDict(frozen=True)
15
+
16
+ services: list[HealthReportingApplicationComponentBase]
17
+
18
+ async def get_health(self: Self) -> Mapping[str, bool]:
19
+ return MappingProxyType({service.name: await service.is_healthy()
20
+ for service
21
+ in self.services})
22
+
23
+ @property
24
+ def name(self: Self) -> str:
25
+ return "application_health_service"
src/ctp_slack_bot/services/content_ingestion_service.py CHANGED
@@ -1,32 +1,31 @@
1
  from loguru import logger
2
- from pydantic import BaseModel
3
- from typing import Self, Sequence, Set
4
 
5
- from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.db.repositories import VectorizedChunkRepository
7
  from ctp_slack_bot.enums import EventType
8
  from ctp_slack_bot.models import Chunk, Content, SlackMessage
9
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
  from ctp_slack_bot.services.vectorization_service import VectorizationService
11
 
12
- class ContentIngestionService(BaseModel):
 
13
  """
14
  Service for ingesting content.
15
  """
16
 
 
 
17
  settings: Settings
18
  event_brokerage_service: EventBrokerageService
19
  vectorized_chunk_repository: VectorizedChunkRepository
20
  vectorization_service: VectorizationService
21
 
22
- class Config:
23
- frozen=True
24
-
25
- def __init__(self: Self, **data) -> None:
26
- super().__init__(**data)
27
  self.event_brokerage_service.subscribe(EventType.INCOMING_CONTENT, self.process_incoming_content)
28
  # self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.process_incoming_slack_message)
29
- logger.debug("Created {}", self.__class__.__name__)
30
 
31
  async def process_incoming_content(self: Self, content: Content) -> None:
32
  logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
@@ -46,3 +45,7 @@ class ContentIngestionService(BaseModel):
46
  async def __vectorize_and_store_chunks_in_database(self: Self, chunks: Sequence[Chunk]) -> Set[str]:
47
  vectorized_chunks = await self.vectorization_service.vectorize(chunks)
48
  return await self.vectorized_chunk_repository.insert_many(vectorized_chunks)
 
 
 
 
 
1
  from loguru import logger
2
+ from pydantic import ConfigDict
3
+ from typing import Any, Self, Sequence, Set
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
6
  from ctp_slack_bot.db.repositories import VectorizedChunkRepository
7
  from ctp_slack_bot.enums import EventType
8
  from ctp_slack_bot.models import Chunk, Content, SlackMessage
9
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
  from ctp_slack_bot.services.vectorization_service import VectorizationService
11
 
12
+
13
+ class ContentIngestionService(ApplicationComponentBase):
14
  """
15
  Service for ingesting content.
16
  """
17
 
18
+ model_config = ConfigDict(frozen=True)
19
+
20
  settings: Settings
21
  event_brokerage_service: EventBrokerageService
22
  vectorized_chunk_repository: VectorizedChunkRepository
23
  vectorization_service: VectorizationService
24
 
25
+ def model_post_init(self: Self, context: Any, /) -> None:
26
+ super().model_post_init(context)
 
 
 
27
  self.event_brokerage_service.subscribe(EventType.INCOMING_CONTENT, self.process_incoming_content)
28
  # self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.process_incoming_slack_message)
 
29
 
30
  async def process_incoming_content(self: Self, content: Content) -> None:
31
  logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
 
45
  async def __vectorize_and_store_chunks_in_database(self: Self, chunks: Sequence[Chunk]) -> Set[str]:
46
  vectorized_chunks = await self.vectorization_service.vectorize(chunks)
47
  return await self.vectorized_chunk_repository.insert_many(vectorized_chunks)
48
+
49
+ @property
50
+ def name(self: Self) -> str:
51
+ return "content_ingestion_service"
src/ctp_slack_bot/services/context_retrieval_service.py CHANGED
@@ -1,28 +1,24 @@
1
  from loguru import logger
2
- from pydantic import BaseModel
3
  from typing import Self, Sequence
4
 
5
- from ctp_slack_bot.core.config import Settings
6
  from ctp_slack_bot.db.repositories import VectorizedChunkRepository
7
  from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk
8
  from ctp_slack_bot.services.vectorization_service import VectorizationService
9
 
10
- class ContextRetrievalService(BaseModel):
 
11
  """
12
  Service for retrieving relevant context from the vector database based on user questions.
13
  """
14
 
 
 
15
  settings: Settings
16
  vectorization_service: VectorizationService
17
  vectorized_chunk_repository: VectorizedChunkRepository
18
 
19
- class Config:
20
- frozen=True
21
-
22
- def __init__(self: Self, **data) -> None:
23
- super().__init__(**data)
24
- logger.debug("Created {}", self.__class__.__name__)
25
-
26
  async def get_context(self: Self, message: SlackMessage) -> Sequence[Chunk]:
27
  """
28
  Retrieve relevant context for a given SlackMessage by vectorizing the message and
@@ -43,8 +39,8 @@ class ContextRetrievalService(BaseModel):
43
 
44
  query = VectorQuery(
45
  query_embeddings=vectorized_message_chunks[0].embedding,
46
- k=self.settings.TOP_K_MATCHES,
47
- score_threshold=self.settings.SCORE_THRESHOLD,
48
  filter_metadata={} # Can be expanded to include filters based on message metadata
49
  )
50
 
@@ -55,3 +51,7 @@ class ContextRetrievalService(BaseModel):
55
  except Exception as e:
56
  logger.error("An error occurred while searching the vector database for context: {}", e)
57
  return ()
 
 
 
 
 
1
  from loguru import logger
2
+ from pydantic import ConfigDict
3
  from typing import Self, Sequence
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
6
  from ctp_slack_bot.db.repositories import VectorizedChunkRepository
7
  from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk
8
  from ctp_slack_bot.services.vectorization_service import VectorizationService
9
 
10
+
11
+ class ContextRetrievalService(ApplicationComponentBase):
12
  """
13
  Service for retrieving relevant context from the vector database based on user questions.
14
  """
15
 
16
+ model_config = ConfigDict(frozen=True)
17
+
18
  settings: Settings
19
  vectorization_service: VectorizationService
20
  vectorized_chunk_repository: VectorizedChunkRepository
21
 
 
 
 
 
 
 
 
22
  async def get_context(self: Self, message: SlackMessage) -> Sequence[Chunk]:
23
  """
24
  Retrieve relevant context for a given SlackMessage by vectorizing the message and
 
39
 
40
  query = VectorQuery(
41
  query_embeddings=vectorized_message_chunks[0].embedding,
42
+ k=self.settings.top_k_matches,
43
+ score_threshold=self.settings.score_threshold,
44
  filter_metadata={} # Can be expanded to include filters based on message metadata
45
  )
46
 
 
51
  except Exception as e:
52
  logger.error("An error occurred while searching the vector database for context: {}", e)
53
  return ()
54
+
55
+ @property
56
+ def name(self: Self) -> str:
57
+ return "context_retrieval_service"
src/ctp_slack_bot/services/embeddings_model_service.py CHANGED
@@ -1,25 +1,24 @@
1
  from loguru import logger
2
  from openai import AsyncOpenAI
3
- from pydantic import BaseModel, PrivateAttr
4
- from typing import Any, Dict, Sequence, Self
5
 
6
- from ctp_slack_bot.core import Settings
7
 
8
- class EmbeddingsModelService(BaseModel):
 
9
  """
10
  Service for embeddings model operations.
11
  """
12
 
13
- settings: Settings
14
- _open_ai_client: PrivateAttr = PrivateAttr()
15
 
16
- class Config:
17
- frozen=True
18
 
19
- def __init__(self: Self, **data: Dict[str, Any]) -> None:
20
- super().__init__(**data)
21
- self._open_ai_client = AsyncOpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
22
- logger.debug("Created {}", self.__class__.__name__)
23
 
24
  async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]:
25
  """Get embeddings for a collection of texts using OpenAI’s API.
@@ -28,20 +27,24 @@ class EmbeddingsModelService(BaseModel):
28
  texts (Collection[str]): Collection of text chunks to embed
29
 
30
  Returns:
31
- NDArray: Array of embeddings with shape (n_texts, VECTOR_DIMENSION)
32
 
33
  Raises:
34
  ValueError: If the embedding dimensions don't match expected size
35
  """
36
  logger.debug("Creating embeddings for {} text string(s)…", len(texts))
37
  response = await self._open_ai_client.embeddings.create(
38
- model=self.settings.EMBEDDING_MODEL,
39
  input=texts,
40
  encoding_format="float" # Ensure we get raw float values.
41
  )
42
  embeddings = tuple(tuple(data.embedding) for data in response.data)
43
  match embeddings:
44
- case (first, _) if len(first) != self.settings.VECTOR_DIMENSION:
45
- logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.VECTOR_DIMENSION, len(first))
46
  raise ValueError() # TODO: raise a more specific type.
47
  return embeddings
 
 
 
 
 
1
  from loguru import logger
2
  from openai import AsyncOpenAI
3
+ from pydantic import ConfigDict
4
+ from typing import Any, Sequence, Self
5
 
6
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
7
 
8
+
9
+ class EmbeddingsModelService(ApplicationComponentBase):
10
  """
11
  Service for embeddings model operations.
12
  """
13
 
14
+ model_config = ConfigDict(frozen=True)
 
15
 
16
+ settings: Settings
17
+ _open_ai_client: AsyncOpenAI
18
 
19
+ def model_post_init(self: Self, context: Any, /) -> None:
20
+ super().model_post_init(context)
21
+ self._open_ai_client = AsyncOpenAI(api_key=self.settings.openai_api_key.get_secret_value())
 
22
 
23
  async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]:
24
  """Get embeddings for a collection of texts using OpenAI’s API.
 
27
  texts (Collection[str]): Collection of text chunks to embed
28
 
29
  Returns:
30
+ NDArray: Array of embeddings with shape (n_texts, vector_dimension)
31
 
32
  Raises:
33
  ValueError: If the embedding dimensions don't match expected size
34
  """
35
  logger.debug("Creating embeddings for {} text string(s)…", len(texts))
36
  response = await self._open_ai_client.embeddings.create(
37
+ model=self.settings.embedding_model,
38
  input=texts,
39
  encoding_format="float" # Ensure we get raw float values.
40
  )
41
  embeddings = tuple(tuple(data.embedding) for data in response.data)
42
  match embeddings:
43
+ case (first, _) if len(first) != self.settings.vector_dimension:
44
+ logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.vector_dimension, len(first))
45
  raise ValueError() # TODO: raise a more specific type.
46
  return embeddings
47
+
48
+ @property
49
+ def name(self: Self) -> str:
50
+ return "embeddings_model_service"
src/ctp_slack_bot/services/event_brokerage_service.py CHANGED
@@ -1,24 +1,21 @@
1
  from asyncio import create_task, iscoroutinefunction, to_thread
2
  from collections import defaultdict
3
  from loguru import logger
4
- from pydantic import BaseModel, PrivateAttr
5
- from typing import Any, Callable, Dict, List, Self
6
 
 
7
  from ctp_slack_bot.enums import EventType
8
 
9
- class EventBrokerageService(BaseModel):
 
10
  """
11
  Service for brokering events between services.
12
  """
13
 
14
- _subscribers: PrivateAttr = PrivateAttr(default_factory=lambda: defaultdict(list))
15
-
16
- class Config:
17
- frozen=True
18
 
19
- def __init__(self: Self, **data) -> None:
20
- super().__init__(**data)
21
- logger.debug("Created {}", self.__class__.__name__)
22
 
23
  def subscribe(self: Self, type: EventType, callback: Callable) -> None:
24
  """Subscribe to an event type with a callback function."""
@@ -45,3 +42,7 @@ class EventBrokerageService(BaseModel):
45
  create_task(to_thread(callback, data))
46
  except Exception as e:
47
  logger.error("Error scheduling synchronous callback to handle event {}: {}", type, e)
 
 
 
 
 
1
  from asyncio import create_task, iscoroutinefunction, to_thread
2
  from collections import defaultdict
3
  from loguru import logger
4
+ from pydantic import ConfigDict, PrivateAttr
5
+ from typing import Any, Callable, List, MutableMapping, Self
6
 
7
+ from ctp_slack_bot.core import ApplicationComponentBase
8
  from ctp_slack_bot.enums import EventType
9
 
10
+
11
+ class EventBrokerageService(ApplicationComponentBase):
12
  """
13
  Service for brokering events between services.
14
  """
15
 
16
+ model_config = ConfigDict(frozen=True)
 
 
 
17
 
18
+ _subscribers: MutableMapping[EventType, list[Callable]] = PrivateAttr(default_factory=lambda: defaultdict(list))
 
 
19
 
20
  def subscribe(self: Self, type: EventType, callback: Callable) -> None:
21
  """Subscribe to an event type with a callback function."""
 
42
  create_task(to_thread(callback, data))
43
  except Exception as e:
44
  logger.error("Error scheduling synchronous callback to handle event {}: {}", type, e)
45
+
46
+ @property
47
+ def name(self: Self) -> str:
48
+ return "event_brokerage_service"
src/ctp_slack_bot/services/google_drive_service.py CHANGED
@@ -1,17 +1,17 @@
1
  from datetime import datetime
2
  from cachetools import TTLCache
3
  from functools import reduce
4
- from google.oauth2 import service_account
5
- from googleapiclient.discovery import build
6
  from googleapiclient.http import MediaIoBaseDownload
7
  from googleapiclient.errors import HttpError
8
  from io import BytesIO
9
  from itertools import chain
10
  from loguru import logger
11
- from pydantic import BaseModel, PrivateAttr
12
- from typing import Collection, Optional, Self
13
 
14
- from ctp_slack_bot.core import Settings
15
  from ctp_slack_bot.models import GoogleDriveMetadata
16
 
17
 
@@ -19,40 +19,39 @@ FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
19
  PATH_SEPARATOR: str = "/"
20
 
21
 
22
- class GoogleDriveService(BaseModel):
23
  """Service for interacting with Google Drive."""
24
 
25
- settings: Settings
26
- _google_drive_client: PrivateAttr = PrivateAttr()
27
- _folder_cache: PrivateAttr = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
28
 
29
- class Config:
30
- frozen=True
 
31
 
32
- def __init__(self: Self, **data) -> None:
33
- super().__init__(**data)
34
- credentials = service_account.Credentials.from_service_account_info({
35
  "type": "service_account",
36
- "project_id": self.settings.GOOGLE_PROJECT_ID,
37
- "private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID.get_secret_value(),
38
- "private_key": self.settings.GOOGLE_PRIVATE_KEY.get_secret_value(),
39
- "client_email": self.settings.GOOGLE_CLIENT_EMAIL,
40
- "client_id": self.settings.GOOGLE_CLIENT_ID,
41
- "token_uri": self.settings.GOOGLE_TOKEN_URI,
42
  }, scopes=["https://www.googleapis.com/auth/drive"])
43
  self._google_drive_client = build('drive', 'v3', credentials=credentials)
44
- logger.debug("Created {}", self.__class__.__name__)
45
 
46
  def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
47
  """Resolve a folder path to a Google Drive ID."""
48
 
49
  if not folder_path:
50
- return self.settings.GOOGLE_DRIVE_ROOT_ID
51
 
52
  if folder_path in self._folder_cache:
53
  return self._folder_cache[folder_path]
54
 
55
- current_id = self.settings.GOOGLE_DRIVE_ROOT_ID
56
  try:
57
  for part in folder_path.split(PATH_SEPARATOR):
58
  results = self._google_drive_client.files().list(
@@ -112,7 +111,7 @@ class GoogleDriveService(BaseModel):
112
  match item_path.rsplit(PATH_SEPARATOR, 1):
113
  case [item_name]:
114
  folder_path = ""
115
- folder_id = self.settings.GOOGLE_DRIVE_ROOT_ID
116
  case [folder_path, item_name]:
117
  folder_id = self._resolve_folder_id(folder_path)
118
 
@@ -151,3 +150,7 @@ class GoogleDriveService(BaseModel):
151
  except HttpError as e:
152
  logger.error("Error reading file by ID, {}: {}", file_id, e)
153
  return None
 
 
 
 
 
1
  from datetime import datetime
2
  from cachetools import TTLCache
3
  from functools import reduce
4
+ from google.oauth2.service_account import Credentials
5
+ from googleapiclient.discovery import build, Resource
6
  from googleapiclient.http import MediaIoBaseDownload
7
  from googleapiclient.errors import HttpError
8
  from io import BytesIO
9
  from itertools import chain
10
  from loguru import logger
11
+ from pydantic import ConfigDict, PrivateAttr
12
+ from typing import Any, Collection, Optional, Self
13
 
14
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
15
  from ctp_slack_bot.models import GoogleDriveMetadata
16
 
17
 
 
19
  PATH_SEPARATOR: str = "/"
20
 
21
 
22
+ class GoogleDriveService(ApplicationComponentBase):
23
  """Service for interacting with Google Drive."""
24
 
25
+ model_config = ConfigDict(frozen=True)
 
 
26
 
27
+ settings: Settings
28
+ _google_drive_client: Resource
29
+ _folder_cache: TTLCache = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
30
 
31
+ def model_post_init(self: Self, context: Any, /) -> None:
32
+ super().model_post_init(context)
33
+ credentials = Credentials.from_service_account_info({
34
  "type": "service_account",
35
+ "project_id": self.settings.google_project_id,
36
+ "private_key_id": self.settings.google_private_key_id.get_secret_value(),
37
+ "private_key": self.settings.google_private_key.get_secret_value(),
38
+ "client_email": self.settings.google_client_email,
39
+ "client_id": self.settings.google_client_id,
40
+ "token_uri": self.settings.google_token_uri,
41
  }, scopes=["https://www.googleapis.com/auth/drive"])
42
  self._google_drive_client = build('drive', 'v3', credentials=credentials)
43
+ logger.info(type(self._google_drive_client))
44
 
45
  def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
46
  """Resolve a folder path to a Google Drive ID."""
47
 
48
  if not folder_path:
49
+ return self.settings.google_drive_root_id
50
 
51
  if folder_path in self._folder_cache:
52
  return self._folder_cache[folder_path]
53
 
54
+ current_id = self.settings.google_drive_root_id
55
  try:
56
  for part in folder_path.split(PATH_SEPARATOR):
57
  results = self._google_drive_client.files().list(
 
111
  match item_path.rsplit(PATH_SEPARATOR, 1):
112
  case [item_name]:
113
  folder_path = ""
114
+ folder_id = self.settings.google_drive_root_id
115
  case [folder_path, item_name]:
116
  folder_id = self._resolve_folder_id(folder_path)
117
 
 
150
  except HttpError as e:
151
  logger.error("Error reading file by ID, {}: {}", file_id, e)
152
  return None
153
+
154
+ @property
155
+ def name(self: Self) -> str:
156
+ return "google_drive_service"
src/ctp_slack_bot/services/http_client_service.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dependency_injector.resources import AsyncResource
2
+ from httpx import AsyncClient
3
+ from typing import Self
4
+
5
+
6
+ # TODO: Implement HTTPClientService to abstract away underlying HTTP client.
7
+
8
+
9
+ class HTTPClientServiceResource(AsyncResource):
10
+ async def init(self: Self) -> AsyncClient:
11
+ return AsyncClient()
12
+
13
+ async def shutdown(self: Self, client: AsyncClient) -> None:
14
+ await client.aclose()
src/ctp_slack_bot/services/http_server_service.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from aiohttp.web import Application, AppRunner, Response, TCPSite
2
+ from dependency_injector.resources import AsyncResource
3
+ from pydantic import ConfigDict
4
+ from typing import Self
5
+
6
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
7
+
8
+
9
+ class HTTPServerService(ApplicationComponentBase):
10
+ model_config = ConfigDict(frozen=True)
11
+
12
+ settings: Settings
13
+
14
+ async def listen(self: Self) -> None:
15
+ pass
src/ctp_slack_bot/services/language_model_service.py CHANGED
@@ -1,28 +1,26 @@
1
  from datetime import datetime
2
  from loguru import logger
3
  from openai import AsyncOpenAI
4
- from openai.types.chat import ChatCompletion
5
- from pydantic import BaseModel, PrivateAttr
6
- from typing import Collection, Self
7
 
8
- from ctp_slack_bot.core import Settings
9
  from ctp_slack_bot.models import Chunk
10
 
11
- class LanguageModelService(BaseModel):
 
12
  """
13
  Service for language model operations.
14
  """
15
 
16
- settings: Settings
17
- _open_ai_client: PrivateAttr = PrivateAttr()
18
 
19
- class Config:
20
- frozen=True
21
 
22
- def __init__(self: Self, **data) -> None:
23
- super().__init__(**data)
24
- self._open_ai_client = AsyncOpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
25
- logger.debug("Created {}", self.__class__.__name__)
26
 
27
  async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str: # TODO: generify into just another agent.
28
  """Generate a response using OpenAI’s API with retrieved context.
@@ -36,7 +34,7 @@ class LanguageModelService(BaseModel):
36
  """
37
  logger.debug("Generating response for question “{}” using {} context chunks…", question, len(context))
38
  messages = [
39
- {"role": "system", "content": self.settings.SYSTEM_PROMPT},
40
  {"role": "user", "content": (
41
  f"""Inquirer Name: {asker}
42
 
@@ -48,11 +46,14 @@ class LanguageModelService(BaseModel):
48
  Context from class materials and transcripts:
49
  {'\n\n'.join(chunk.text for chunk in context)}""")}
50
  ]
51
- response: ChatCompletion = await self._open_ai_client.chat.completions.create(
52
- model=self.settings.CHAT_MODEL,
53
  messages=messages,
54
- max_tokens=self.settings.MAX_TOKENS,
55
- temperature=self.settings.TEMPERATURE
56
  )
57
-
58
  return response.choices[0].message.content
 
 
 
 
 
1
  from datetime import datetime
2
  from loguru import logger
3
  from openai import AsyncOpenAI
4
+ from pydantic import ConfigDict
5
+ from typing import Any, Collection, Self
 
6
 
7
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
8
  from ctp_slack_bot.models import Chunk
9
 
10
+
11
+ class LanguageModelService(ApplicationComponentBase):
12
  """
13
  Service for language model operations.
14
  """
15
 
16
+ model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
 
17
 
18
+ settings: Settings
19
+ _open_ai_client: AsyncOpenAI
20
 
21
+ def model_post_init(self: Self, context: Any, /) -> None:
22
+ super().model_post_init(context)
23
+ self._open_ai_client = AsyncOpenAI(api_key=self.settings.openai_api_key.get_secret_value())
 
24
 
25
  async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str: # TODO: generify into just another agent.
26
  """Generate a response using OpenAI’s API with retrieved context.
 
34
  """
35
  logger.debug("Generating response for question “{}” using {} context chunks…", question, len(context))
36
  messages = [
37
+ {"role": "system", "content": self.settings.system_prompt},
38
  {"role": "user", "content": (
39
  f"""Inquirer Name: {asker}
40
 
 
46
  Context from class materials and transcripts:
47
  {'\n\n'.join(chunk.text for chunk in context)}""")}
48
  ]
49
+ response = await self._open_ai_client.chat.completions.create(
50
+ model=self.settings.chat_model,
51
  messages=messages,
52
+ max_tokens=self.settings.max_tokens,
53
+ temperature=self.settings.temperature
54
  )
 
55
  return response.choices[0].message.content
56
+
57
+ @property
58
+ def name(self: Self) -> str:
59
+ return "language_model_service"
src/ctp_slack_bot/services/question_dispatch_service.py CHANGED
@@ -1,8 +1,8 @@
1
  from loguru import logger
2
- from pydantic import BaseModel
3
- from typing import Self
4
 
5
- from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.enums import EventType
7
  from ctp_slack_bot.models import Chunk, SlackMessage
8
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
@@ -10,26 +10,28 @@ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalSer
10
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
11
 
12
 
13
- class QuestionDispatchService(BaseModel):
14
  """
15
  Service for determining whether a Slack message constitutes a question.
16
  """
17
 
 
 
18
  settings: Settings
19
  event_brokerage_service: EventBrokerageService
20
  context_retrieval_service: ContextRetrievalService
21
  answer_retrieval_service: AnswerRetrievalService
22
 
23
- class Config:
24
- frozen=True
25
-
26
- def __init__(self: Self, **data) -> None:
27
- super().__init__(**data)
28
  self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.__process_incoming_slack_message)
29
- logger.debug("Created {}", self.__class__.__name__)
30
 
31
  async def __process_incoming_slack_message(self: Self, message: SlackMessage) -> None:
32
  if message.subtype != 'bot_message':
33
  logger.debug("Question dispatch service received an answerable question: {}", message.text)
34
  context = await self.context_retrieval_service.get_context(message)
35
  await self.answer_retrieval_service.push(message, context)
 
 
 
 
 
1
  from loguru import logger
2
+ from pydantic import ConfigDict
3
+ from typing import Any, Self
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
6
  from ctp_slack_bot.enums import EventType
7
  from ctp_slack_bot.models import Chunk, SlackMessage
8
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
 
10
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
11
 
12
 
13
+ class QuestionDispatchService(ApplicationComponentBase):
14
  """
15
  Service for determining whether a Slack message constitutes a question.
16
  """
17
 
18
+ model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
19
+
20
  settings: Settings
21
  event_brokerage_service: EventBrokerageService
22
  context_retrieval_service: ContextRetrievalService
23
  answer_retrieval_service: AnswerRetrievalService
24
 
25
+ def model_post_init(self: Self, context: Any, /) -> None:
26
+ super().model_post_init(context)
 
 
 
27
  self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.__process_incoming_slack_message)
 
28
 
29
  async def __process_incoming_slack_message(self: Self, message: SlackMessage) -> None:
30
  if message.subtype != 'bot_message':
31
  logger.debug("Question dispatch service received an answerable question: {}", message.text)
32
  context = await self.context_retrieval_service.get_context(message)
33
  await self.answer_retrieval_service.push(message, context)
34
+
35
+ @property
36
+ def name(self: Self) -> str:
37
+ return "question_dispatch_service"
src/ctp_slack_bot/services/schedule_service.py CHANGED
@@ -4,29 +4,26 @@ from asyncio import create_task, iscoroutinefunction, to_thread
4
  from datetime import datetime
5
  from dependency_injector.resources import Resource
6
  from loguru import logger
7
- from pydantic import BaseModel, PrivateAttr
8
  from pytz import timezone
9
- from typing import Optional, Self
10
 
11
- from ctp_slack_bot.core import Settings
12
 
13
- class ScheduleService(BaseModel):
 
14
  """
15
  Service for running scheduled tasks.
16
  """
17
 
18
- settings: Settings
19
- _scheduler: PrivateAttr
20
 
21
- class Config:
22
- frozen=True
23
 
24
- def __init__(self: Self, **data) -> None:
25
- super().__init__(**data)
26
- zone = self.settings.SCHEDULER_TIMEZONE
27
- self._configure_jobs()
28
- self._scheduler = AsyncIOScheduler(timezone=timezone(zone))
29
- logger.debug("Created {}", self.__class__.__name__)
30
 
31
  def _configure_jobs(self: Self) -> None:
32
  # Example jobs (uncomment and implement as needed)
@@ -55,6 +52,11 @@ class ScheduleService(BaseModel):
55
  else:
56
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
57
 
 
 
 
 
 
58
  class ScheduleServiceResource(Resource):
59
  def init(self: Self, settings: Settings) -> ScheduleService:
60
  logger.info("Starting scheduler…")
 
4
  from datetime import datetime
5
  from dependency_injector.resources import Resource
6
  from loguru import logger
7
+ from pydantic import ConfigDict
8
  from pytz import timezone
9
+ from typing import Any, Optional, Self
10
 
11
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
12
 
13
+
14
+ class ScheduleService(ApplicationComponentBase):
15
  """
16
  Service for running scheduled tasks.
17
  """
18
 
19
+ model_config = ConfigDict(frozen=True)
 
20
 
21
+ settings: Settings
22
+ _scheduler: AsyncIOScheduler
23
 
24
+ def model_post_init(self: Self, context: Any, /) -> None:
25
+ super().model_post_init(context)
26
+ self._scheduler = AsyncIOScheduler(timezone=timezone(self.settings.scheduler_timezone))
 
 
 
27
 
28
  def _configure_jobs(self: Self) -> None:
29
  # Example jobs (uncomment and implement as needed)
 
52
  else:
53
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
54
 
55
+ @property
56
+ def name(self: Self) -> str:
57
+ return "schedule_service"
58
+
59
+
60
  class ScheduleServiceResource(Resource):
61
  def init(self: Self, settings: Settings) -> ScheduleService:
62
  logger.info("Starting scheduler…")
src/ctp_slack_bot/services/slack_service.py CHANGED
@@ -1,12 +1,14 @@
1
  from dependency_injector.resources import AsyncResource
 
2
  from loguru import logger
3
  from openai import OpenAI
4
- from pydantic import BaseModel
5
  from re import compile as compile_re
6
  from slack_bolt.async_app import AsyncApp
7
  from slack_sdk.web.async_slack_response import AsyncSlackResponse
8
- from typing import Any, Mapping, Self
9
 
 
10
  from ctp_slack_bot.enums import EventType
11
  from ctp_slack_bot.models import SlackMessage, SlackResponse
12
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
@@ -15,25 +17,23 @@ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
15
  SLACK_USER_MENTION_PATTERN = compile_re(r"<@([A-Z0-9]+)>")
16
 
17
 
18
- class SlackService(BaseModel):
19
  """
20
  Service for interfacing with Slack.
21
  """
22
 
 
 
23
  event_brokerage_service: EventBrokerageService
 
24
  slack_bolt_app: AsyncApp
25
- user_id_name_map: dict[str, str] # This is deliberately mutable; the map may change as new users are encountered.
26
-
27
- class Config:
28
- arbitrary_types_allowed = True
29
- frozen=True
30
 
31
- def __init__(self: Self, **data) -> None:
32
- super().__init__(**data)
33
  self.event_brokerage_service.subscribe(EventType.OUTGOING_SLACK_RESPONSE, self.send_message)
34
- logger.debug("Created {}", self.__class__.__name__)
35
 
36
- def adapt_event_payload(self: Self, event: Mapping[str, Any]) -> SlackMessage:
37
  text = SLACK_USER_MENTION_PATTERN.sub(lambda match: f"@{self.user_id_name_map.get(match.group(1))}", event.get("text", "")) # TODO: permit look-up of Slack again when not found.
38
  user_id = event.get("user")
39
  return SlackMessage(
@@ -41,7 +41,7 @@ class SlackService(BaseModel):
41
  subtype=event.get("subtype"),
42
  channel=event.get("channel"),
43
  channel_type=event.get("channel_type"),
44
- user=self.user_id_name_map.get(user_id, user_id),
45
  bot_id=event.get("bot_id"),
46
  thread_ts=event.get("thread_ts"),
47
  text=text,
@@ -50,7 +50,7 @@ class SlackService(BaseModel):
50
  )
51
 
52
  async def process_message(self: Self, event: Mapping[str, Any]) -> None:
53
- slack_message = self.adapt_event_payload(event.get("event", {}))
54
  logger.debug("Received message from Slack: {}", slack_message)
55
  await self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message)
56
 
@@ -70,9 +70,21 @@ class SlackService(BaseModel):
70
  self.slack_bolt_app.event("app_mention")(self.handle_app_mention_event)
71
  logger.debug("Registered 2 handlers for Slack Bolt message and app mention events.")
72
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  class SlackServiceResource(AsyncResource):
75
- async def init(self: Self, event_brokerage_service: EventBrokerageService, slack_bolt_app: AsyncApp) -> SlackService:
76
  match await slack_bolt_app.client.users_list():
77
  case AsyncSlackResponse(status_code=200, data={"ok": True, "members": users}):
78
  user_id_name_map = {id: display_name
@@ -83,7 +95,7 @@ class SlackServiceResource(AsyncResource):
83
  case something:
84
  user_id_name_map = {}
85
  logger.error("Could not obtain a list of user name for the workspace.")
86
- slack_service = SlackService(event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app, user_id_name_map=user_id_name_map)
87
  slack_service.initialize()
88
  return slack_service
89
 
 
1
  from dependency_injector.resources import AsyncResource
2
+ from httpx import AsyncClient
3
  from loguru import logger
4
  from openai import OpenAI
5
+ from pydantic import ConfigDict
6
  from re import compile as compile_re
7
  from slack_bolt.async_app import AsyncApp
8
  from slack_sdk.web.async_slack_response import AsyncSlackResponse
9
+ from typing import Any, Mapping, MutableMapping, Self
10
 
11
+ from ctp_slack_bot.core import HealthReportingApplicationComponentBase
12
  from ctp_slack_bot.enums import EventType
13
  from ctp_slack_bot.models import SlackMessage, SlackResponse
14
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
17
  SLACK_USER_MENTION_PATTERN = compile_re(r"<@([A-Z0-9]+)>")
18
 
19
 
20
+ class SlackService(HealthReportingApplicationComponentBase):
21
  """
22
  Service for interfacing with Slack.
23
  """
24
 
25
+ model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
26
+
27
  event_brokerage_service: EventBrokerageService
28
+ http_client: AsyncClient
29
  slack_bolt_app: AsyncApp
30
+ user_id_name_map: MutableMapping[str, str]
 
 
 
 
31
 
32
+ def model_post_init(self: Self, context: Any, /) -> None:
33
+ super().model_post_init(context)
34
  self.event_brokerage_service.subscribe(EventType.OUTGOING_SLACK_RESPONSE, self.send_message)
 
35
 
36
+ async def adapt_event_payload(self: Self, event: Mapping[str, Any]) -> SlackMessage:
37
  text = SLACK_USER_MENTION_PATTERN.sub(lambda match: f"@{self.user_id_name_map.get(match.group(1))}", event.get("text", "")) # TODO: permit look-up of Slack again when not found.
38
  user_id = event.get("user")
39
  return SlackMessage(
 
41
  subtype=event.get("subtype"),
42
  channel=event.get("channel"),
43
  channel_type=event.get("channel_type"),
44
+ user=await self._get_user_display_name(user_id),
45
  bot_id=event.get("bot_id"),
46
  thread_ts=event.get("thread_ts"),
47
  text=text,
 
50
  )
51
 
52
  async def process_message(self: Self, event: Mapping[str, Any]) -> None:
53
+ slack_message = await self.adapt_event_payload(event.get("event", {}))
54
  logger.debug("Received message from Slack: {}", slack_message)
55
  await self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message)
56
 
 
70
  self.slack_bolt_app.event("app_mention")(self.handle_app_mention_event)
71
  logger.debug("Registered 2 handlers for Slack Bolt message and app mention events.")
72
 
73
+ @property
74
+ def name(self: Self) -> str:
75
+ return "slack_service"
76
+
77
+ async def is_healthy(self: Self) -> bool:
78
+ response = await self.http_client.get("https://slack-status.com/api/v2.0.0/current")
79
+ return response.status_code == 200
80
+
81
+ async def _get_user_display_name(self: Self, user_id: str) -> str:
82
+ return self.user_id_name_map.get(user_id, f"<@{user_id}>")
83
+ # TODO: Handle new users.
84
+
85
 
86
  class SlackServiceResource(AsyncResource):
87
+ async def init(self: Self, event_brokerage_service: EventBrokerageService, http_client: AsyncClient, slack_bolt_app: AsyncApp) -> SlackService:
88
  match await slack_bolt_app.client.users_list():
89
  case AsyncSlackResponse(status_code=200, data={"ok": True, "members": users}):
90
  user_id_name_map = {id: display_name
 
95
  case something:
96
  user_id_name_map = {}
97
  logger.error("Could not obtain a list of user name for the workspace.")
98
+ slack_service = SlackService(event_brokerage_service=event_brokerage_service, http_client=http_client, slack_bolt_app=slack_bolt_app, user_id_name_map=user_id_name_map)
99
  slack_service.initialize()
100
  return slack_service
101
 
src/ctp_slack_bot/services/vectorization_service.py CHANGED
@@ -1,26 +1,22 @@
1
  from loguru import logger
2
- from pydantic import BaseModel
3
  from typing import Self, Sequence
4
 
5
- from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.models import Chunk, VectorizedChunk
7
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
8
 
9
- class VectorizationService(BaseModel):
 
10
  """
11
  Service for vectorizing chunks of text data.
12
  """
13
 
 
 
14
  settings: Settings
15
  embeddings_model_service: EmbeddingsModelService
16
 
17
- class Config:
18
- frozen=True
19
-
20
- def __init__(self: Self, **data) -> None:
21
- super().__init__(**data)
22
- logger.debug("Created {}", self.__class__.__name__)
23
-
24
  async def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]:
25
  embeddings = await self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks])
26
  return tuple(VectorizedChunk(
@@ -32,3 +28,7 @@ class VectorizationService(BaseModel):
32
  )
33
  for chunk, embedding
34
  in zip(chunks, embeddings))
 
 
 
 
 
1
  from loguru import logger
2
+ from pydantic import ConfigDict
3
  from typing import Self, Sequence
4
 
5
+ from ctp_slack_bot.core import ApplicationComponentBase, Settings
6
  from ctp_slack_bot.models import Chunk, VectorizedChunk
7
  from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
8
 
9
+
10
+ class VectorizationService(ApplicationComponentBase):
11
  """
12
  Service for vectorizing chunks of text data.
13
  """
14
 
15
+ model_config = ConfigDict(frozen=True)
16
+
17
  settings: Settings
18
  embeddings_model_service: EmbeddingsModelService
19
 
 
 
 
 
 
 
 
20
  async def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]:
21
  embeddings = await self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks])
22
  return tuple(VectorizedChunk(
 
28
  )
29
  for chunk, embedding
30
  in zip(chunks, embeddings))
31
+
32
+ @property
33
+ def name(self: Self) -> str:
34
+ return "vectorization_service"
src/ctp_slack_bot/utils/secret_stripper.py CHANGED
@@ -1,5 +1,6 @@
1
  from urllib.parse import urlparse, urlunparse
2
 
 
3
  def sanitize_mongo_db_uri(uri: str) -> str:
4
  parts = urlparse(uri)
5
  sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
 
1
  from urllib.parse import urlparse, urlunparse
2
 
3
+
4
  def sanitize_mongo_db_uri(uri: str) -> str:
5
  parts = urlparse(uri)
6
  sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))