Kevin Li commited on
Commit
ef444a7
·
unverified ·
2 Parent(s): 551ef8f b9c8796

Merge pull request #2 from CUNYTechPrep/dependency-injection

Browse files
README.MD CHANGED
@@ -1,5 +1,10 @@
1
  # CTP Slack Bot
2
 
 
 
 
 
 
3
  ## Tech Stack
4
 
5
  * Hugging Face Spaces for hosting and serverless API
@@ -7,7 +12,10 @@
7
  * MongoDB for data persistence
8
  * Docker for containerization
9
  * Python
10
- * See `pyproject.toml` for Python packages.
 
 
 
11
 
12
  ## General Project Structure
13
 
@@ -26,7 +34,8 @@
26
  * `scripts/`: utility scripts for development, deployment, etc.
27
  * `run-dev.sh`: script to run the application locally
28
  * `notebooks/`: Jupyter notebooks for exploration and model development
29
- * `.env`: local environment variables for development purposes
 
30
 
31
  ## How to Run the Application
32
 
@@ -34,6 +43,18 @@
34
 
35
  Just run the Docker image. 😉
36
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  ### For Development
38
 
39
  Development usually requires rapid iteration. That means a change in the code ought to be reflected as soon as possible in the behavior of the application.
 
1
  # CTP Slack Bot
2
 
3
+ ## _Modus Operandi_ in a Nutshell
4
+
5
+ * Intelligently responds to Slack messages based on a repository of data.
6
+ * Periodically checks for new content to add to its repository.
7
+
8
  ## Tech Stack
9
 
10
  * Hugging Face Spaces for hosting and serverless API
 
12
  * MongoDB for data persistence
13
  * Docker for containerization
14
  * Python
15
+ * FastAPI for serving HTTP requests
16
+ * httpx for making HTTP requests
17
+ * APScheduler for running periodic tasks in the background
18
+ * See `pyproject.toml` for additional Python packages.
19
 
20
  ## General Project Structure
21
 
 
34
  * `scripts/`: utility scripts for development, deployment, etc.
35
  * `run-dev.sh`: script to run the application locally
36
  * `notebooks/`: Jupyter notebooks for exploration and model development
37
+ * `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`)
38
+ * `Dockerfile`: Docker container build definition
39
 
40
  ## How to Run the Application
41
 
 
43
 
44
  Just run the Docker image. 😉
45
 
46
+ Build it with:
47
+
48
+ ```sh
49
+ docker build . -t ctp-slack-bot
50
+ ```
51
+
52
+ Run it with:
53
+
54
+ ```sh
55
+ docker run --env-file=.env -p 8000:8000 --name my-ctp-slack-bot-instance ctp-slack-bot
56
+ ```
57
+
58
  ### For Development
59
 
60
  Development usually requires rapid iteration. That means a change in the code ought to be reflected as soon as possible in the behavior of the application.
pyproject.toml CHANGED
@@ -19,34 +19,37 @@ classifiers = [
19
  "Operating System :: OS Independent",
20
  ]
21
  dependencies = [
22
- "pydantic>=2.0.0",
23
- "pydantic-settings>=2.0.0",
24
- "fastapi>=0.100.0",
25
- "uvicorn>=0.22.0",
26
- "loguru>=0.7.0",
27
- "python-dotenv>=1.0.0",
28
- "httpx>=0.24.1",
29
- "tenacity>=8.2.2",
30
- "pybreaker>=1.0.2",
 
31
  "pytz>=2025.2",
32
- "apscheduler>=3.10.1",
33
- "slack-sdk>=3.21.3",
34
- "pymongo>=4.4.1",
35
- "webvtt-py>=0.4.6",
36
- "langchain>=0.0.200",
37
- "transformers>=4.30.0",
38
- "torch>=2.0.0",
 
 
39
  ]
40
 
41
  [project.optional-dependencies]
42
  dev = [
43
- "pytest>=7.3.1",
44
- "pytest-cov>=4.1.0",
45
- "mypy>=1.3.0",
46
  "types-pytz>=2025.2",
47
- "black>=23.3.0",
48
- "isort>=5.12.0",
49
- "ruff>=0.0.270",
50
  ]
51
 
52
  [project.urls]
 
19
  "Operating System :: OS Independent",
20
  ]
21
  dependencies = [
22
+ "dependency-injector>=4.46.0",
23
+ "pydantic>=2.11.2",
24
+ "pydantic-settings>=2.8.1",
25
+ "fastapi>=0.115.12",
26
+ "uvicorn>=0.34.0",
27
+ "loguru>=0.7.3",
28
+ "python-dotenv>=1.1.0",
29
+ "httpx>=0.28.1",
30
+ "tenacity>=9.1.2",
31
+ "pybreaker>=1.3.0",
32
  "pytz>=2025.2",
33
+ "apscheduler>=3.11.0",
34
+ "slack-sdk>=3.35.0",
35
+ "pymongo>=4.11.3 ",
36
+ "numpy>=2.2.4",
37
+ "webvtt-py>=0.5.1",
38
+ "openai>=1.70.0",
39
+ # "langchain>=0.3.23",
40
+ # "transformers>=4.51.0",
41
+ # "torch>=2.6.0",
42
  ]
43
 
44
  [project.optional-dependencies]
45
  dev = [
46
+ "pytest>=8.3.5",
47
+ "pytest-cov>=6.1.1",
48
+ "mypy>=1.15.0",
49
  "types-pytz>=2025.2",
50
+ "black>=25.1.0",
51
+ "isort>=6.0.1",
52
+ "ruff>=0.11.4",
53
  ]
54
 
55
  [project.urls]
src/ctp_slack_bot/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.containers import Container
src/ctp_slack_bot/api/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.api.main import app, run
src/ctp_slack_bot/api/main.py CHANGED
@@ -1,14 +1,14 @@
1
  from contextlib import asynccontextmanager
2
- from fastapi import FastAPI, HTTPException
3
  from loguru import logger
4
- from typing import AsyncGenerator, Never
 
5
 
 
6
  from ctp_slack_bot.api.routes import router
7
- from ctp_slack_bot.core.config import Settings, settings
8
- from ctp_slack_bot.core.logging import setup_logging
9
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
10
- from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
11
-
12
 
13
  @asynccontextmanager
14
  async def lifespan(app: FastAPI) -> AsyncGenerator:
@@ -16,20 +16,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
16
  Lifespan context manager for FastAPI application.
17
  Handles startup and shutdown events.
18
  """
19
- # Setup logging
20
- setup_logging()
 
 
 
 
 
21
  logger.info("Starting application")
22
-
23
- # Start scheduler
24
- #scheduler = start_scheduler()
25
- #logger.info("Started scheduler")
26
-
27
- yield
28
-
29
- # Shutdown
30
  logger.info("Shutting down application")
31
- #stop_scheduler(scheduler)
32
- #logger.info("Stopped scheduler")
33
 
34
 
35
  app = FastAPI(
@@ -39,30 +44,27 @@ app = FastAPI(
39
  lifespan=lifespan,
40
  )
41
 
42
- # Include routers
43
  app.include_router(router)
44
 
 
45
  @app.get("/health")
46
- async def health() -> dict[str, str]:
47
  """Health check"""
48
  return {
49
  "status": "healthy"
50
  }
51
 
52
- @app.get("/env", response_class=PrettyJSONResponse)
53
- async def env() -> Settings:
54
- """Server-internal environment variables"""
55
- if not settings.DEBUG:
56
- raise HTTPException(status_code=404)
57
- return settings
58
-
59
-
60
- if __name__ == "__main__":
61
  import uvicorn
62
-
63
  uvicorn.run(
64
  "main:app",
65
  host=settings.API_HOST,
66
  port=settings.API_PORT,
67
  reload=settings.DEBUG
68
  )
 
 
 
 
1
  from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, HTTPException, Depends
3
  from loguru import logger
4
+ from typing import AsyncGenerator
5
+ from dependency_injector.wiring import inject, Provide
6
 
7
+ from ctp_slack_bot import Container
8
  from ctp_slack_bot.api.routes import router
9
+ from ctp_slack_bot.core import Settings, setup_logging
 
10
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
11
+ from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
 
12
 
13
  @asynccontextmanager
14
  async def lifespan(app: FastAPI) -> AsyncGenerator:
 
16
  Lifespan context manager for FastAPI application.
17
  Handles startup and shutdown events.
18
  """
19
+ # Initialize container and wire the container to modules that need dependency injection.
20
+ container = Container()
21
+ container.wire(packages=['ctp_slack_bot'])
22
+ app.container = container
23
+
24
+ # Setup logging.
25
+ setup_logging(container)
26
  logger.info("Starting application")
27
+
28
+ # Start the scheduler.
29
+ scheduler = start_scheduler(container)
30
+ logger.info("Started scheduler")
31
+
32
+ yield # control to FastAPI until shutdown.
33
+
34
+ # Shutdown.
35
  logger.info("Shutting down application")
36
+ stop_scheduler(scheduler)
37
+ logger.info("Stopped scheduler")
38
 
39
 
40
  app = FastAPI(
 
44
  lifespan=lifespan,
45
  )
46
 
47
+ # Include routers.
48
  app.include_router(router)
49
 
50
+ # Provide a minimalist health check endpoint for clients to detect availability.
51
  @app.get("/health")
52
+ async def get_health() -> dict[str, str]:
53
  """Health check"""
54
  return {
55
  "status": "healthy"
56
  }
57
 
58
+ # Alternate starting path for development
59
+ def run() -> None:
 
 
 
 
 
 
 
60
  import uvicorn
61
+ settings = Settings() # type: ignore
62
  uvicorn.run(
63
  "main:app",
64
  host=settings.API_HOST,
65
  port=settings.API_PORT,
66
  reload=settings.DEBUG
67
  )
68
+
69
+ if __name__ == "__main__":
70
+ run()
src/ctp_slack_bot/api/routes.py CHANGED
@@ -1,13 +1,19 @@
1
  from fastapi import APIRouter, Depends, HTTPException, status
 
2
  from loguru import logger
3
 
4
- #from ctp_slack_bot.api.dependencies import get_slack_service, get_transcript_service
5
- #from ctp_slack_bot.models.transcript import TranscriptRequest, TranscriptResponse
6
- #from ctp_slack_bot.services.slack_service import SlackService
7
- #from ctp_slack_bot.services.transcript_service import TranscriptService
8
 
9
  router = APIRouter(prefix="/api/v1")
10
 
 
 
 
 
 
 
11
 
12
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
13
  # async def analyze_transcript(
 
1
  from fastapi import APIRouter, Depends, HTTPException, status
2
+ from dependency_injector.wiring import inject, Provide
3
  from loguru import logger
4
 
5
+ from ctp_slack_bot import Container
6
+ from ctp_slack_bot.core import Settings
7
+ from ctp_slack_bot.services import SlackService
 
8
 
9
  router = APIRouter(prefix="/api/v1")
10
 
11
+ @router.get("/env", response_model=Settings)
12
+ @inject
13
+ async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
14
+ if not settings.DEBUG:
15
+ raise HTTPException(status_code=404)
16
+ return settings
17
 
18
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
19
  # async def analyze_transcript(
src/ctp_slack_bot/containers.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dependency_injector.containers import DeclarativeContainer
2
+ from dependency_injector.providers import Factory, Singleton
3
+ from openai import OpenAI
4
+
5
+ from ctp_slack_bot.core.config import Settings
6
+ from ctp_slack_bot.db.mongo_db import MongoDB
7
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
8
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
9
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
10
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
11
+ from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
12
+ from ctp_slack_bot.services.slack_service import SlackService
13
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
14
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
15
+
16
+
17
+ class Container(DeclarativeContainer):
18
+ settings = Singleton(Settings)
19
+
20
+ event_brokerage_service = Singleton(EventBrokerageService)
21
+
22
+ mongo_db = Singleton(MongoDB, settings=settings)
23
+
24
+ # Repositories
25
+ # transcript_repository = Factory(
26
+ # # Your transcript repository class
27
+ # db=db
28
+ # )
29
+
30
+ open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client.
31
+
32
+ vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
33
+
34
+ vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client)
35
+
36
+ content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
37
+
38
+ context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
39
+
40
+ answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client)
41
+
42
+ question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
43
+
44
+ slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service)
src/ctp_slack_bot/core/__init__.py CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+ from ctp_slack_bot.core.config import Settings
2
+ from ctp_slack_bot.core.logging import logger, setup_logging
src/ctp_slack_bot/core/config.py CHANGED
@@ -1,9 +1,6 @@
1
- from functools import lru_cache
2
- from typing import Literal, Optional
3
-
4
  from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
5
  from pydantic_settings import BaseSettings, SettingsConfigDict
6
-
7
 
8
  class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
9
  """
@@ -54,14 +51,3 @@ class Settings(BaseSettings): # TODO: Strong guarantees of validity, because gar
54
  env_file_encoding="utf-8",
55
  case_sensitive=True,
56
  )
57
-
58
-
59
- @lru_cache
60
- def get_settings() -> Settings:
61
- """
62
- Get cached settings instance.
63
- """
64
- return Settings() # type: ignore
65
-
66
-
67
- settings = get_settings()
 
 
 
 
1
  from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
2
  from pydantic_settings import BaseSettings, SettingsConfigDict
3
+ from typing import Literal, Optional
4
 
5
  class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
6
  """
 
51
  env_file_encoding="utf-8",
52
  case_sensitive=True,
53
  )
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/core/logging.py CHANGED
@@ -1,13 +1,9 @@
1
- import logging
2
- import sys
3
- from typing import Dict, Union
4
-
5
  from loguru import logger
 
 
6
 
7
- from ctp_slack_bot.core.config import settings
8
-
9
-
10
- class InterceptHandler(logging.Handler):
11
  """
12
  Intercept standard logging messages toward Loguru.
13
 
@@ -15,7 +11,7 @@ class InterceptHandler(logging.Handler):
15
  to Loguru, allowing unified logging across the application.
16
  """
17
 
18
- def emit(self, record: logging.LogRecord) -> None:
19
  # Get corresponding Loguru level if it exists
20
  try:
21
  level = logger.level(record.levelname).name
@@ -23,8 +19,8 @@ class InterceptHandler(logging.Handler):
23
  level = record.levelno
24
 
25
  # Find caller from where the logged message originated
26
- frame, depth = logging.currentframe(), 2
27
- while frame and frame.f_code.co_filename == logging.__file__:
28
  frame = frame.f_back
29
  depth += 1
30
 
@@ -33,7 +29,7 @@ class InterceptHandler(logging.Handler):
33
  )
34
 
35
 
36
- def setup_logging() -> None:
37
  """
38
  Configure logging with Loguru.
39
 
@@ -41,9 +37,12 @@ def setup_logging() -> None:
41
  configures the log format based on settings, and intercepts
42
  standard logging messages.
43
  """
 
 
 
44
  # Remove default loguru handler
45
  logger.remove()
46
-
47
  # Determine log format
48
  if settings.LOG_FORMAT == "json":
49
  log_format = {
@@ -62,17 +61,17 @@ def setup_logging() -> None:
62
  "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
63
  "<level>{message}</level>"
64
  )
65
-
66
  # Add console handler
67
  logger.add(
68
- sys.stderr,
69
  format=format_string,
70
  level=settings.LOG_LEVEL,
71
  serialize=(settings.LOG_FORMAT == "json"),
72
  backtrace=True,
73
  diagnose=True,
74
  )
75
-
76
  # Add file handler for non-DEBUG environments
77
  if settings.LOG_LEVEL != "DEBUG":
78
  logger.add(
@@ -84,19 +83,12 @@ def setup_logging() -> None:
84
  level=settings.LOG_LEVEL,
85
  serialize=(settings.LOG_FORMAT == "json"),
86
  )
87
-
88
  # Intercept standard logging messages
89
- logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
90
-
91
  # Update logging levels for some noisy libraries
92
- for logger_name in [
93
- "uvicorn",
94
- "uvicorn.error",
95
- "fastapi",
96
- "httpx",
97
- "apscheduler",
98
- "pymongo",
99
- ]:
100
- logging.getLogger(logger_name).setLevel(logging.INFO)
101
-
102
  logger.info(f"Logging configured with level {settings.LOG_LEVEL}")
 
1
+ from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
 
 
 
2
  from loguru import logger
3
+ from sys import stderr
4
+ from typing import Dict, Union
5
 
6
+ class InterceptHandler(Handler):
 
 
 
7
  """
8
  Intercept standard logging messages toward Loguru.
9
 
 
11
  to Loguru, allowing unified logging across the application.
12
  """
13
 
14
+ def emit(self, record: LogRecord) -> None:
15
  # Get corresponding Loguru level if it exists
16
  try:
17
  level = logger.level(record.levelname).name
 
19
  level = record.levelno
20
 
21
  # Find caller from where the logged message originated
22
+ frame, depth = currentframe(), 2
23
+ while frame and frame.f_code.co_filename == logging_file:
24
  frame = frame.f_back
25
  depth += 1
26
 
 
29
  )
30
 
31
 
32
+ def setup_logging(container: "Container") -> None:
33
  """
34
  Configure logging with Loguru.
35
 
 
37
  configures the log format based on settings, and intercepts
38
  standard logging messages.
39
  """
40
+ from ctp_slack_bot import Container
41
+ settings = container.settings() if container else Provide[Container.settings]
42
+
43
  # Remove default loguru handler
44
  logger.remove()
45
+
46
  # Determine log format
47
  if settings.LOG_FORMAT == "json":
48
  log_format = {
 
61
  "<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
62
  "<level>{message}</level>"
63
  )
64
+
65
  # Add console handler
66
  logger.add(
67
+ stderr,
68
  format=format_string,
69
  level=settings.LOG_LEVEL,
70
  serialize=(settings.LOG_FORMAT == "json"),
71
  backtrace=True,
72
  diagnose=True,
73
  )
74
+
75
  # Add file handler for non-DEBUG environments
76
  if settings.LOG_LEVEL != "DEBUG":
77
  logger.add(
 
83
  level=settings.LOG_LEVEL,
84
  serialize=(settings.LOG_FORMAT == "json"),
85
  )
86
+
87
  # Intercept standard logging messages
88
+ basicConfig(handlers=[InterceptHandler()], level=0, force=True)
89
+
90
  # Update logging levels for some noisy libraries
91
+ for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"):
92
+ getLogger(logger_name).setLevel(INFO)
93
+
 
 
 
 
 
 
 
94
  logger.info(f"Logging configured with level {settings.LOG_LEVEL}")
src/ctp_slack_bot/db/MongoDB.py DELETED
@@ -1,122 +0,0 @@
1
- from motor.motor_asyncio import AsyncIOMotorClient
2
- from pymongo import IndexModel, ASCENDING
3
- import logging
4
- from typing import Optional
5
-
6
- from ctp_slack_bot.core.config import settings
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- class MongoDB:
11
- """
12
- MongoDB connection and initialization class.
13
- Handles connection to MongoDB, database selection, and index creation.
14
- """
15
- def __init__(self):
16
- self.client: Optional[AsyncIOMotorClient] = None
17
- self.db = None
18
- self.vector_collection = None
19
- self.initialized = False
20
-
21
- async def connect(self):
22
- """
23
- Connect to MongoDB using connection string from settings.
24
- """
25
- if self.client is not None:
26
- return
27
-
28
- if not settings.MONGODB_URI:
29
- raise ValueError("MONGODB_URI is not set in environment variables")
30
-
31
- try:
32
- # Create MongoDB connection
33
- self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
34
- self.db = self.client[settings.MONGODB_DB_NAME]
35
- self.vector_collection = self.db["vector_store"]
36
- logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
37
- except Exception as e:
38
- logger.error(f"Error connecting to MongoDB: {str(e)}")
39
- raise
40
-
41
- async def initialize(self):
42
- """
43
- Initialize MongoDB with required collections and indexes.
44
- """
45
- if self.initialized:
46
- return
47
-
48
- if not self.client:
49
- await self.connect()
50
-
51
- try:
52
- # Create vector index for similarity search
53
- await self.create_vector_index()
54
- self.initialized = True
55
- logger.info("MongoDB initialized successfully")
56
- except Exception as e:
57
- logger.error(f"Error initializing MongoDB: {str(e)}")
58
- raise
59
-
60
- async def create_vector_index(self):
61
- """
62
- Create vector index for similarity search using MongoDB Atlas Vector Search.
63
- """
64
- try:
65
- # Check if index already exists
66
- existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
67
- index_names = [index.get('name') for index in existing_indexes]
68
-
69
- if "vector_index" not in index_names:
70
- # Create vector search index
71
- index_definition = {
72
- "mappings": {
73
- "dynamic": True,
74
- "fields": {
75
- "embedding": {
76
- "dimensions": settings.VECTOR_DIMENSION,
77
- "similarity": "cosine",
78
- "type": "knnVector"
79
- }
80
- }
81
- }
82
- }
83
-
84
- # Create the index
85
- await self.db.command({
86
- "createIndexes": self.vector_collection.name,
87
- "indexes": [
88
- {
89
- "name": "vector_index",
90
- "key": {"embedding": "vector"},
91
- "weights": {"embedding": 1},
92
- "vectorSearchOptions": index_definition
93
- }
94
- ]
95
- })
96
-
97
- # Create additional metadata indexes for filtering
98
- await self.vector_collection.create_index([("metadata.source", ASCENDING)])
99
- await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
100
-
101
- logger.info("Vector search index created")
102
- else:
103
- logger.info("Vector search index already exists")
104
-
105
- except Exception as e:
106
- logger.error(f"Error creating vector index: {str(e)}")
107
- raise
108
-
109
- async def close(self):
110
- """
111
- Close MongoDB connection.
112
- """
113
- if self.client:
114
- self.client.close()
115
- self.client = None
116
- self.db = None
117
- self.vector_collection = None
118
- self.initialized = False
119
- logger.info("MongoDB connection closed")
120
-
121
- # Create a singleton instance
122
- mongodb = MongoDB()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/db/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.db.mongo_db import MongoDB
src/ctp_slack_bot/db/mongo_db.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from motor.motor_asyncio import AsyncIOMotorClient
2
+ from loguru import logger
3
+ from pydantic import BaseModel, model_validator
4
+ #from pymongo import IndexModel, ASCENDING
5
+ from typing import Optional, Self
6
+
7
+ from ctp_slack_bot.core.config import Settings
8
+
9
+ class MongoDB(BaseModel):
10
+ """
11
+ MongoDB connection and initialization class.
12
+ Handles connection to MongoDB, database selection, and index creation.
13
+ """
14
+
15
+ settings: Settings
16
+
17
+ @model_validator(mode='after')
18
+ def post_init(self: Self) -> Self:
19
+ logger.debug("Created {}", self.__class__.__name__)
20
+ return self
21
+
22
+ def __init__(self: Self, settings: Settings) -> Self:
23
+ #self.client: Optional[AsyncIOMotorClient] = None
24
+ #self.db = None
25
+ #self.vector_collection = None
26
+ #self.initialized = False
27
+ pass # The above initialization needs to be done some other way.
28
+
29
+ # async def connect(self):
30
+ # """
31
+ # Connect to MongoDB using connection string from settings.
32
+ # """
33
+ # if self.client is not None:
34
+ # return
35
+
36
+ # if not settings.MONGODB_URI:
37
+ # raise ValueError("MONGODB_URI is not set in environment variables")
38
+
39
+ # try:
40
+ # # Create MongoDB connection
41
+ # self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
42
+ # self.db = self.client[settings.MONGODB_DB_NAME]
43
+ # self.vector_collection = self.db["vector_store"]
44
+ # logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
45
+ # except Exception as e:
46
+ # logger.error(f"Error connecting to MongoDB: {str(e)}")
47
+ # raise
48
+
49
+ # async def initialize(self):
50
+ # """
51
+ # Initialize MongoDB with required collections and indexes.
52
+ # """
53
+ # if self.initialized:
54
+ # return
55
+
56
+ # if not self.client:
57
+ # await self.connect()
58
+
59
+ # try:
60
+ # # Create vector index for similarity search
61
+ # await self.create_vector_index()
62
+ # self.initialized = True
63
+ # logger.info("MongoDB initialized successfully")
64
+ # except Exception as e:
65
+ # logger.error(f"Error initializing MongoDB: {str(e)}")
66
+ # raise
67
+
68
+ # async def create_vector_index(self):
69
+ # """
70
+ # Create vector index for similarity search using MongoDB Atlas Vector Search.
71
+ # """
72
+ # try:
73
+ # # Check if index already exists
74
+ # existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
75
+ # index_names = [index.get('name') for index in existing_indexes]
76
+
77
+ # if "vector_index" not in index_names:
78
+ # # Create vector search index
79
+ # index_definition = {
80
+ # "mappings": {
81
+ # "dynamic": True,
82
+ # "fields": {
83
+ # "embedding": {
84
+ # "dimensions": settings.VECTOR_DIMENSION,
85
+ # "similarity": "cosine",
86
+ # "type": "knnVector"
87
+ # }
88
+ # }
89
+ # }
90
+ # }
91
+
92
+ # # Create the index
93
+ # await self.db.command({
94
+ # "createIndexes": self.vector_collection.name,
95
+ # "indexes": [
96
+ # {
97
+ # "name": "vector_index",
98
+ # "key": {"embedding": "vector"},
99
+ # "weights": {"embedding": 1},
100
+ # "vectorSearchOptions": index_definition
101
+ # }
102
+ # ]
103
+ # })
104
+
105
+ # # Create additional metadata indexes for filtering
106
+ # await self.vector_collection.create_index([("metadata.source", ASCENDING)])
107
+ # await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
108
+
109
+ # logger.info("Vector search index created")
110
+ # else:
111
+ # logger.info("Vector search index already exists")
112
+
113
+ # except Exception as e:
114
+ # logger.error(f"Error creating vector index: {str(e)}")
115
+ # raise
116
+
117
+ # async def close(self):
118
+ # """
119
+ # Close MongoDB connection.
120
+ # """
121
+ # if self.client:
122
+ # self.client.close()
123
+ # self.client = None
124
+ # self.db = None
125
+ # self.vector_collection = None
126
+ # self.initialized = False
127
+ # logger.info("MongoDB connection closed")
src/ctp_slack_bot/models/__init__.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from ctp_slack_bot.models.base import Content, Ingestible, Metadata
2
+ from ctp_slack_bot.models.content import RetreivedContext
3
+ from ctp_slack_bot.models.slack import SlackMessage
4
+ from ctp_slack_bot.models.vector_query import VectorQuery
src/ctp_slack_bot/models/{VectorQuery.py → vector_query.py} RENAMED
@@ -1,6 +1,5 @@
1
  from pydantic import BaseModel, Field, validator
2
  from typing import Optional, List, Dict, Any
3
- from ctp_slack_bot.core.config import settings
4
 
5
  class VectorQuery(BaseModel):
6
  """Model for vector database similarity search queries.
@@ -12,6 +11,6 @@ class VectorQuery(BaseModel):
12
  filter_metadata: Optional filters for metadata fields
13
  """
14
  query_text: str
15
- k: int = Field(default=settings.TOP_K_MATCHES)
16
  score_threshold: float = Field(default=0.7)
17
- filter_metadata: Optional[Dict[str, Any]] = None
 
1
  from pydantic import BaseModel, Field, validator
2
  from typing import Optional, List, Dict, Any
 
3
 
4
  class VectorQuery(BaseModel):
5
  """Model for vector database similarity search queries.
 
11
  filter_metadata: Optional filters for metadata fields
12
  """
13
  query_text: str
14
+ k: int
15
  score_threshold: float = Field(default=0.7)
16
+ filter_metadata: Optional[Dict[str, Any]] = None
src/ctp_slack_bot/services/__init__.py CHANGED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
2
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
3
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
5
+ from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
6
+ from ctp_slack_bot.services.slack_service import SlackService
7
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
src/ctp_slack_bot/services/{AnswerQuestionService.py → answer_retrieval_service.py} RENAMED
@@ -1,17 +1,30 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List, Optional, Tuple
3
- from ctp_slack_bot.core.config import settings
4
- import numpy as np
5
  from openai import OpenAI
6
- from ctp_slack_bot.models.slack import SlackMessage
7
- from ctp_slack_bot.models.content import RetreivedContext
8
 
9
- class GenerateAnswer():
 
 
 
 
 
10
  """
11
  Service for language model operations.
12
  """
13
- def __init__(self):
14
- self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
 
 
 
 
 
 
 
 
 
 
15
 
16
  def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
17
  """Generate a response using OpenAI's API with retrieved context.
@@ -50,11 +63,3 @@ class GenerateAnswer():
50
  )
51
 
52
  return response.choices[0].message.content
53
-
54
-
55
-
56
- ### REMOVE BELOW, PUT SOMEWHERE IN TESTS BUT IDK WHERE YET
57
- # sm = SlackMessage(text="What is the capital of France?", channel_id="123", user_id="456", timestamp="789")
58
- # context = [RetreivedContext(contextual_text="The capital of France is Paris", metadata_source="class materials", similarity_score=0.95)]
59
- # a = GenerateAnswer()
60
- # a.generate_answer(sm, context)
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
 
 
3
  from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
 
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
+
11
+
12
+ class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service.
13
  """
14
  Service for language model operations.
15
  """
16
+
17
+ settings: Settings
18
+ event_brokerage_service: EventBrokerageService
19
+ client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
20
+
21
+ class Config:
22
+ arbitrary_types_allowed = True
23
+
24
+ @model_validator(mode='after')
25
+ def post_init(self: Self) -> Self:
26
+ logger.debug("Created {}", self.__class__.__name__)
27
+ return self
28
 
29
  def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
30
  """Generate a response using OpenAI's API with retrieved context.
 
63
  )
64
 
65
  return response.choices[0].message.content
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/content_ingestion_service.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Self
4
+
5
+ from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
7
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
8
+
9
+ class ContentIngestionService(BaseModel):
10
+ """
11
+ Service for ingesting content.
12
+ """
13
+
14
+ settings: Settings
15
+ vector_database_service: VectorDatabaseService
16
+ vectorization_service: VectorizationService
17
+
18
+ @model_validator(mode='after')
19
+ def post_init(self: Self) -> Self:
20
+ logger.debug("Created {}", self.__class__.__name__)
21
+ return self
src/ctp_slack_bot/services/{ContextRetrievalService.py → context_retrieval_service.py} RENAMED
@@ -1,29 +1,31 @@
1
- import logging
2
- from typing import List, Dict, Any, Optional
 
3
 
4
- from ctp_slack_bot.models.slack import SlackMessage
5
- from ctp_slack_bot.models.content import RetreivedContext
6
- from ctp_slack_bot.models.VectorQuery import VectorQuery
7
- from ctp_slack_bot.services.VectorizationService import VectorizationService
8
- from ctp_slack_bot.services.VectorDatabaseService import VectorDatabaseService
9
- from ctp_slack_bot.core.config import settings
10
 
11
- logger = logging.getLogger(__name__)
12
-
13
- class ContextRetrievalService:
14
  """
15
  Service for retrieving relevant context from the vector database based on user questions.
16
  """
17
-
18
- def __init__(self):
19
- self.vectorization_service = VectorizationService()
20
- self.vector_db_service = VectorDatabaseService()
 
 
 
 
 
21
 
22
  async def initialize(self):
23
  """
24
  Initialize the required services.
25
  """
26
- await self.vector_db_service.initialize()
27
 
28
  async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
29
  """
@@ -62,7 +64,7 @@ class ContextRetrievalService:
62
  )
63
 
64
  # Search for similar content in vector database
65
- context_results = await self.vector_db_service.search_by_similarity(
66
  query=vector_query,
67
  query_embedding=query_embedding
68
  )
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Any, Dict, List, Optional, Self
4
 
5
+ from ctp_slack_bot.core.config import Settings
6
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery
7
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
 
 
9
 
10
+ class ContextRetrievalService(BaseModel):
 
 
11
  """
12
  Service for retrieving relevant context from the vector database based on user questions.
13
  """
14
+
15
+ settings: Settings
16
+ vectorization_service: VectorizationService
17
+ vector_database_service: VectorDatabaseService
18
+
19
+ @model_validator(mode='after')
20
+ def post_init(self: Self) -> Self:
21
+ logger.debug("Created {}", self.__class__.__name__)
22
+ return self
23
 
24
  async def initialize(self):
25
  """
26
  Initialize the required services.
27
  """
28
+ await self.vector_database_service.initialize()
29
 
30
  async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
31
  """
 
64
  )
65
 
66
  # Search for similar content in vector database
67
+ context_results = await self.vector_database_service.search_by_similarity(
68
  query=vector_query,
69
  query_embedding=query_embedding
70
  )
src/ctp_slack_bot/services/event_brokerage_service.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import Any, Callable, Dict, List, Self
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
10
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
+
12
+
13
+ class EventBrokerageService(BaseModel):
14
+ """
15
+ Service for brokering events between services.
16
+ """
17
+
18
+ subscribers: Dict[str, List[Callable]] = {}
19
+
20
+ class Config:
21
+ arbitrary_types_allowed = True
22
+
23
+ @model_validator(mode='after')
24
+ def post_init(self: Self) -> Self:
25
+ logger.debug("Created {}", self.__class__.__name__)
26
+ return self
27
+
28
+ def subscribe(self: Self, event_type: str, callback: Callable) -> None:
29
+ """Subscribe to an event type with a callback function."""
30
+ if event_type not in self.subscribers:
31
+ self.subscribers[event_type] = []
32
+ self.subscribers[event_type].append(callback)
33
+
34
+ def publish(self: Self, event_type: str, data: Any = None) -> None:
35
+ """Publish an event with optional data to all subscribers."""
36
+ if event_type in self.subscribers:
37
+ for callback in self.subscribers[event_type]:
38
+ callback(data)
src/ctp_slack_bot/services/question_dispatch_service.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
10
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
12
+
13
+
14
+ class QuestionDispatchService(BaseModel):
15
+ """
16
+ Service for determining whether a Slack message constitutes a question.
17
+ """
18
+
19
+ settings: Settings
20
+ event_brokerage_service: EventBrokerageService
21
+ context_retrieval_service: ContextRetrievalService
22
+ answer_retrieval_service: AnswerRetrievalService
23
+
24
+ @model_validator(mode='after')
25
+ def post_init(self: Self) -> Self:
26
+ logger.debug("Created {}", self.__class__.__name__)
27
+ return self
28
+
29
+ def push(self: Self, message: SlackMessage) -> None:
30
+ context = self.context_retrieval_service.get_context(message)
31
+ self.answer_retrieval_service.generate_answer(message, context)
src/ctp_slack_bot/services/slack_service.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
+
11
+
12
+ class SlackService(BaseModel):
13
+ """
14
+ Service for interfacing with Slack.
15
+ """
16
+
17
+ settings: Settings
18
+ event_brokerage_service: EventBrokerageService
19
+
20
+ @model_validator(mode='after')
21
+ def post_init(self: Self) -> Self:
22
+ logger.debug("Created {}", self.__class__.__name__)
23
+ return self
src/ctp_slack_bot/services/{VectorDatabaseService.py → vector_database_service.py} RENAMED
@@ -1,18 +1,24 @@
1
- import logging
2
- from typing import List, Dict, Any, Optional
3
- # import numpy as np
4
 
5
- from ctp_slack_bot.db.MongoDB import mongodb
6
- from ctp_slack_bot.models.VectorQuery import VectorQuery
7
- from ctp_slack_bot.models.content import RetreivedContext
8
 
9
- logger = logging.getLogger(__name__)
10
-
11
- class VectorDatabaseService:
12
  """
13
  Service for storing and retrieving vector embeddings from MongoDB.
14
  """
15
-
 
 
 
 
 
 
 
 
16
  async def initialize(self):
17
  """
18
  Initialize the database connection.
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Any, Dict, List, Optional, Self
4
 
5
+ from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.db import MongoDB
7
+ from ctp_slack_bot.models import VectorQuery, RetreivedContext
8
 
9
+ class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
 
 
10
  """
11
  Service for storing and retrieving vector embeddings from MongoDB.
12
  """
13
+
14
+ settings: Settings
15
+ mongo_db: MongoDB
16
+
17
+ @model_validator(mode='after')
18
+ def post_init(self: Self) -> Self:
19
+ logger.debug("Created {}", self.__class__.__name__)
20
+ return self
21
+
22
  async def initialize(self):
23
  """
24
  Initialize the database connection.
src/ctp_slack_bot/services/{VectorizationService.py → vectorization_service.py} RENAMED
@@ -1,17 +1,26 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List, Optional
3
- from ctp_slack_bot.core.config import settings
4
  import numpy as np
5
  from openai import OpenAI
 
 
6
 
 
7
 
8
- class VectorizationService():
9
  """
10
  Service for vectorizing chunks of text data.
11
  """
12
- def __init__(self):
13
- self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_embeddings(self, texts: List[str]) -> np.ndarray:
17
  """Get embeddings for a list of texts using OpenAI's API.
@@ -28,7 +37,7 @@ class VectorizationService():
28
  try:
29
  # Use the initialized client instead of the global openai module
30
  response = self.client.embeddings.create(
31
- model=settings.EMBEDDING_MODEL,
32
  input=texts,
33
  encoding_format="float" # Ensure we get raw float values
34
  )
@@ -36,9 +45,9 @@ class VectorizationService():
36
  # Extract embeddings and verify dimensions
37
  embeddings = np.array([data.embedding for data in response.data])
38
 
39
- if embeddings.shape[1] != settings.VECTOR_DIMENSION:
40
  raise ValueError(
41
- f"Embedding dimension mismatch. Expected {settings.VECTOR_DIMENSION}, "
42
  f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
43
  f"in config.py to match the model's output."
44
  )
 
1
+ from loguru import logger
 
 
2
  import numpy as np
3
  from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self
6
 
7
+ from ctp_slack_bot.core import Settings
8
 
9
+ class VectorizationService(BaseModel):
10
  """
11
  Service for vectorizing chunks of text data.
12
  """
 
 
13
 
14
+ settings: Settings
15
+ client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
16
+
17
+ class Config:
18
+ arbitrary_types_allowed = True
19
+
20
+ @model_validator(mode='after')
21
+ def post_init(self: Self) -> Self:
22
+ logger.debug("Created {}", self.__class__.__name__)
23
+ return self
24
 
25
  def get_embeddings(self, texts: List[str]) -> np.ndarray:
26
  """Get embeddings for a list of texts using OpenAI's API.
 
37
  try:
38
  # Use the initialized client instead of the global openai module
39
  response = self.client.embeddings.create(
40
+ model=self.settings.EMBEDDING_MODEL,
41
  input=texts,
42
  encoding_format="float" # Ensure we get raw float values
43
  )
 
45
  # Extract embeddings and verify dimensions
46
  embeddings = np.array([data.embedding for data in response.data])
47
 
48
+ if embeddings.shape[1] != self.settings.VECTOR_DIMENSION:
49
  raise ValueError(
50
+ f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, "
51
  f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
52
  f"in config.py to match the model's output."
53
  )
src/ctp_slack_bot/tasks/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
src/ctp_slack_bot/tasks/scheduler.py CHANGED
@@ -1,28 +1,26 @@
1
- from datetime import datetime
2
- from typing import Optional
3
-
4
  from apscheduler.schedulers.asyncio import AsyncIOScheduler
5
  from apscheduler.triggers.cron import CronTrigger
 
 
6
  from loguru import logger
7
  from pytz import timezone
 
8
 
9
- from ctp_slack_bot.core.config import settings
10
- #from ctp_slack_bot.tasks.error_report import send_error_report
11
- #from ctp_slack_bot.tasks.transcript_cleanup import cleanup_old_transcripts
12
-
13
 
14
- def start_scheduler() -> AsyncIOScheduler:
 
15
  """
16
  Start and configure the APScheduler instance.
17
 
18
  Returns:
19
  AsyncIOScheduler: Configured scheduler instance
20
  """
21
- scheduler = AsyncIOScheduler(timezone=timezone(settings.SCHEDULER_TIMEZONE))
 
 
22
 
23
- # Add jobs to the scheduler
24
-
25
- # Daily error report at 7 AM
26
  # scheduler.add_job(
27
  # send_error_report,
28
  # CronTrigger(hour=7, minute=0),
@@ -30,8 +28,6 @@ def start_scheduler() -> AsyncIOScheduler:
30
  # name="Daily Error Report",
31
  # replace_existing=True,
32
  # )
33
-
34
- # Weekly transcript cleanup on Sundays at 1 AM
35
  # scheduler.add_job(
36
  # cleanup_old_transcripts,
37
  # CronTrigger(day_of_week="sun", hour=1, minute=0),
@@ -40,25 +36,25 @@ def start_scheduler() -> AsyncIOScheduler:
40
  # replace_existing=True,
41
  # )
42
 
43
- # Start the scheduler
44
  scheduler.start()
45
  logger.info("Scheduler started with timezone: {}", settings.SCHEDULER_TIMEZONE)
46
- logger.info("Next run for error report: {}",
47
- scheduler.get_job("daily_error_report").next_run_time)
48
- logger.info("Next run for transcript cleanup: {}",
49
- scheduler.get_job("weekly_transcript_cleanup").next_run_time)
50
 
51
  return scheduler
52
 
53
 
54
- def stop_scheduler(scheduler: Optional[AsyncIOScheduler] = None) -> None:
55
  """
56
  Shutdown the scheduler gracefully.
57
 
58
  Args:
59
  scheduler: The scheduler instance to shut down
60
  """
61
- if scheduler is not None and scheduler.running:
62
  logger.info("Shutting down scheduler")
63
  scheduler.shutdown(wait=False)
64
  logger.info("Scheduler shutdown complete")
 
 
 
 
1
  from apscheduler.schedulers.asyncio import AsyncIOScheduler
2
  from apscheduler.triggers.cron import CronTrigger
3
+ from datetime import datetime
4
+ from dependency_injector.wiring import inject, Provide
5
  from loguru import logger
6
  from pytz import timezone
7
+ from typing import Optional
8
 
9
+ from ctp_slack_bot import Container
 
 
 
10
 
11
+ @inject
12
+ def start_scheduler(container: Container) -> AsyncIOScheduler:
13
  """
14
  Start and configure the APScheduler instance.
15
 
16
  Returns:
17
  AsyncIOScheduler: Configured scheduler instance
18
  """
19
+ settings = container.settings() if container else Provide[Container.settings]
20
+ zone = settings.SCHEDULER_TIMEZONE
21
+ scheduler = AsyncIOScheduler(timezone=timezone(zone))
22
 
23
+ # Add jobs to the scheduler.
 
 
24
  # scheduler.add_job(
25
  # send_error_report,
26
  # CronTrigger(hour=7, minute=0),
 
28
  # name="Daily Error Report",
29
  # replace_existing=True,
30
  # )
 
 
31
  # scheduler.add_job(
32
  # cleanup_old_transcripts,
33
  # CronTrigger(day_of_week="sun", hour=1, minute=0),
 
36
  # replace_existing=True,
37
  # )
38
 
39
+ # Start the scheduler.
40
  scheduler.start()
41
  logger.info("Scheduler started with timezone: {}", settings.SCHEDULER_TIMEZONE)
42
+ # logger.info("Next run for error report: {}",
43
+ # scheduler.get_job("daily_error_report").next_run_time)
44
+ # logger.info("Next run for transcript cleanup: {}",
45
+ # scheduler.get_job("weekly_transcript_cleanup").next_run_time)
46
 
47
  return scheduler
48
 
49
 
50
+ def stop_scheduler(scheduler: AsyncIOScheduler) -> None:
51
  """
52
  Shutdown the scheduler gracefully.
53
 
54
  Args:
55
  scheduler: The scheduler instance to shut down
56
  """
57
+ if scheduler.running:
58
  logger.info("Shutting down scheduler")
59
  scheduler.shutdown(wait=False)
60
  logger.info("Scheduler shutdown complete")