LiKenun commited on
Commit
c6a2a56
Β·
1 Parent(s): ce323cf

Make all code participate in dependency injection

Browse files
README.MD CHANGED
@@ -1,5 +1,10 @@
1
  # CTP Slack Bot
2
 
 
 
 
 
 
3
  ## Tech Stack
4
 
5
  * Hugging Face Spaces for hosting and serverless API
@@ -7,7 +12,10 @@
7
  * MongoDB for data persistence
8
  * Docker for containerization
9
  * Python
10
- * See `pyproject.toml` for Python packages.
 
 
 
11
 
12
  ## General Project Structure
13
 
@@ -26,7 +34,8 @@
26
  * `scripts/`: utility scripts for development, deployment, etc.
27
  * `run-dev.sh`: script to run the application locally
28
  * `notebooks/`: Jupyter notebooks for exploration and model development
29
- * `.env`: local environment variables for development purposes
 
30
 
31
  ## How to Run the Application
32
 
 
1
  # CTP Slack Bot
2
 
3
+ ## _Modus Operandi_ in a Nutshell
4
+
5
+ * Intelligently responds to Slack messages based on a repository of data.
6
+ * Periodically checks for new content to add to its repository.
7
+
8
  ## Tech Stack
9
 
10
  * Hugging Face Spaces for hosting and serverless API
 
12
  * MongoDB for data persistence
13
  * Docker for containerization
14
  * Python
15
+ * FastAPI for serving HTTP requests
16
+ * httpx for making HTTP requests
17
+ * APScheduler for running periodic tasks in the background
18
+ * See `pyproject.toml` for additional Python packages.
19
 
20
  ## General Project Structure
21
 
 
34
  * `scripts/`: utility scripts for development, deployment, etc.
35
  * `run-dev.sh`: script to run the application locally
36
  * `notebooks/`: Jupyter notebooks for exploration and model development
37
+ * `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`)
38
+ * `Dockerfile`: Docker container build definition
39
 
40
  ## How to Run the Application
41
 
src/ctp_slack_bot/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.containers import Container
src/ctp_slack_bot/api/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.api.main import app, run
src/ctp_slack_bot/api/main.py CHANGED
@@ -4,12 +4,11 @@ from loguru import logger
4
  from typing import AsyncGenerator
5
  from dependency_injector.wiring import inject, Provide
6
 
 
7
  from ctp_slack_bot.api.routes import router
8
- from ctp_slack_bot.core.config import Settings
9
- from ctp_slack_bot.core.logging import setup_logging
10
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
11
- from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
12
- from ctp_slack_bot.containers import Container
13
 
14
  @asynccontextmanager
15
  async def lifespan(app: FastAPI) -> AsyncGenerator:
@@ -19,14 +18,8 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
19
  """
20
  # Initialize container and wire the container to modules that need dependency injection.
21
  container = Container()
 
22
  app.container = container
23
- container.wire(
24
- modules=[
25
- "ctp_slack_bot.api.routes",
26
- "ctp_slack_bot.services",
27
- "ctp_slack_bot.tasks"
28
- ]
29
- )
30
 
31
  # Setup logging.
32
  setup_logging(container)
 
4
  from typing import AsyncGenerator
5
  from dependency_injector.wiring import inject, Provide
6
 
7
+ from ctp_slack_bot import Container
8
  from ctp_slack_bot.api.routes import router
9
+ from ctp_slack_bot.core import Settings, setup_logging
 
10
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
11
+ from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
 
12
 
13
  @asynccontextmanager
14
  async def lifespan(app: FastAPI) -> AsyncGenerator:
 
18
  """
19
  # Initialize container and wire the container to modules that need dependency injection.
20
  container = Container()
21
+ container.wire(packages=['ctp_slack_bot'])
22
  app.container = container
 
 
 
 
 
 
 
23
 
24
  # Setup logging.
25
  setup_logging(container)
src/ctp_slack_bot/api/routes.py CHANGED
@@ -2,18 +2,17 @@ from fastapi import APIRouter, Depends, HTTPException, status
2
  from dependency_injector.wiring import inject, Provide
3
  from loguru import logger
4
 
5
- from ctp_slack_bot.containers import Container
6
- #from ctp_slack_bot.api.dependencies import get_slack_service, get_transcript_service
7
- from ctp_slack_bot.core.config import Settings
8
- #from ctp_slack_bot.models.transcript import TranscriptRequest, TranscriptResponse
9
- #from ctp_slack_bot.services.slack_service import SlackService
10
- #from ctp_slack_bot.services.transcript_service import TranscriptService
11
 
12
  router = APIRouter(prefix="/api/v1")
13
 
14
  @router.get("/env", response_model=Settings)
15
  @inject
16
  async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
 
 
17
  return settings
18
 
19
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
 
2
  from dependency_injector.wiring import inject, Provide
3
  from loguru import logger
4
 
5
+ from ctp_slack_bot import Container
6
+ from ctp_slack_bot.core import Settings
7
+ from ctp_slack_bot.services import SlackService
 
 
 
8
 
9
  router = APIRouter(prefix="/api/v1")
10
 
11
  @router.get("/env", response_model=Settings)
12
  @inject
13
  async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
14
+ if not settings.DEBUG:
15
+ raise HTTPException(status_code=404)
16
  return settings
17
 
18
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
src/ctp_slack_bot/containers.py CHANGED
@@ -1,43 +1,25 @@
1
  from dependency_injector.containers import DeclarativeContainer
2
- from dependency_injector.providers import Configuration, Factory, Singleton
3
- #from pymongo import MongoClient
4
- #from langchain.embeddings import HuggingFaceEmbeddings
5
- #from transformers import AutoTokenizer, AutoModel
6
 
7
  from ctp_slack_bot.core.config import Settings
8
- #from ctp_slack_bot.db.connection import get_database
9
- #from ctp_slack_bot.services.vectorization_service import VectorizationService
 
 
 
 
 
 
 
 
10
 
11
  class Container(DeclarativeContainer):
12
- # Core dependencies
13
  settings = Singleton(Settings)
14
 
15
- # db_client = Singleton(
16
- # MongoClient,
17
- # host=config.MONGODB_URI.get_secret_value
18
- # )
19
-
20
- # db = Singleton(
21
- # get_database,
22
- # client=db_client,
23
- # db_name=config.MONGODB_NAME
24
- # )
25
-
26
- # Machine-learning models
27
- # tokenizer = Singleton(
28
- # AutoTokenizer.from_pretrained,
29
- # config.EMBEDDING_MODEL
30
- # )
31
-
32
- # model = Singleton(
33
- # AutoModel.from_pretrained,
34
- # config.EMBEDDING_MODEL
35
- # )
36
 
37
- # embeddings = Singleton(
38
- # HuggingFaceEmbeddings,
39
- # model_name=config.EMBEDDING_MODEL
40
- # )
41
 
42
  # Repositories
43
  # transcript_repository = Factory(
@@ -45,15 +27,18 @@ class Container(DeclarativeContainer):
45
  # db=db
46
  # )
47
 
48
- # Services
49
- # vectorization_service = Factory(
50
- # VectorizationService,
51
- # embeddings=embeddings,
52
- # chunk_size=config.CHUNK_SIZE,
53
- # chunk_overlap=config.CHUNK_OVERLAP,
54
- # vector_dimension=config.VECTOR_DIMENSION
55
- # )
 
 
 
 
 
56
 
57
- # Add other services here
58
- # transcript_service = providers.Factory(...)
59
- # slack_service = providers.Factory(...)
 
1
  from dependency_injector.containers import DeclarativeContainer
2
+ from dependency_injector.providers import Factory, Singleton
3
+ from openai import OpenAI
 
 
4
 
5
  from ctp_slack_bot.core.config import Settings
6
+ from ctp_slack_bot.db.mongo_db import MongoDB
7
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
8
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
9
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
10
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
11
+ from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
12
+ from ctp_slack_bot.services.slack_service import SlackService
13
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
14
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
15
+
16
 
17
  class Container(DeclarativeContainer):
 
18
  settings = Singleton(Settings)
19
 
20
+ event_brokerage_service = Singleton(EventBrokerageService)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ mongo_db = Singleton(MongoDB, settings=settings)
 
 
 
23
 
24
  # Repositories
25
  # transcript_repository = Factory(
 
27
  # db=db
28
  # )
29
 
30
+ open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client.
31
+
32
+ vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
33
+
34
+ vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client)
35
+
36
+ content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
37
+
38
+ context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
39
+
40
+ answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client)
41
+
42
+ question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
43
 
44
+ slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service)
 
 
src/ctp_slack_bot/core/__init__.py CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+ from ctp_slack_bot.core.config import Settings
2
+ from ctp_slack_bot.core.logging import logger, setup_logging
src/ctp_slack_bot/core/logging.py CHANGED
@@ -3,9 +3,6 @@ from loguru import logger
3
  from sys import stderr
4
  from typing import Dict, Union
5
 
6
- from ctp_slack_bot.containers import Container
7
-
8
-
9
  class InterceptHandler(Handler):
10
  """
11
  Intercept standard logging messages toward Loguru.
@@ -32,7 +29,7 @@ class InterceptHandler(Handler):
32
  )
33
 
34
 
35
- def setup_logging(container: Container) -> None:
36
  """
37
  Configure logging with Loguru.
38
 
@@ -40,6 +37,7 @@ def setup_logging(container: Container) -> None:
40
  configures the log format based on settings, and intercepts
41
  standard logging messages.
42
  """
 
43
  settings = container.settings() if container else Provide[Container.settings]
44
 
45
  # Remove default loguru handler
 
3
  from sys import stderr
4
  from typing import Dict, Union
5
 
 
 
 
6
  class InterceptHandler(Handler):
7
  """
8
  Intercept standard logging messages toward Loguru.
 
29
  )
30
 
31
 
32
+ def setup_logging(container: "Container") -> None:
33
  """
34
  Configure logging with Loguru.
35
 
 
37
  configures the log format based on settings, and intercepts
38
  standard logging messages.
39
  """
40
+ from ctp_slack_bot import Container
41
  settings = container.settings() if container else Provide[Container.settings]
42
 
43
  # Remove default loguru handler
src/ctp_slack_bot/db/MongoDB.py DELETED
@@ -1,122 +0,0 @@
1
- from motor.motor_asyncio import AsyncIOMotorClient
2
- from pymongo import IndexModel, ASCENDING
3
- import logging
4
- from typing import Optional
5
-
6
- from ctp_slack_bot.core.config import settings
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- class MongoDB:
11
- """
12
- MongoDB connection and initialization class.
13
- Handles connection to MongoDB, database selection, and index creation.
14
- """
15
- def __init__(self):
16
- self.client: Optional[AsyncIOMotorClient] = None
17
- self.db = None
18
- self.vector_collection = None
19
- self.initialized = False
20
-
21
- async def connect(self):
22
- """
23
- Connect to MongoDB using connection string from settings.
24
- """
25
- if self.client is not None:
26
- return
27
-
28
- if not settings.MONGODB_URI:
29
- raise ValueError("MONGODB_URI is not set in environment variables")
30
-
31
- try:
32
- # Create MongoDB connection
33
- self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
34
- self.db = self.client[settings.MONGODB_DB_NAME]
35
- self.vector_collection = self.db["vector_store"]
36
- logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
37
- except Exception as e:
38
- logger.error(f"Error connecting to MongoDB: {str(e)}")
39
- raise
40
-
41
- async def initialize(self):
42
- """
43
- Initialize MongoDB with required collections and indexes.
44
- """
45
- if self.initialized:
46
- return
47
-
48
- if not self.client:
49
- await self.connect()
50
-
51
- try:
52
- # Create vector index for similarity search
53
- await self.create_vector_index()
54
- self.initialized = True
55
- logger.info("MongoDB initialized successfully")
56
- except Exception as e:
57
- logger.error(f"Error initializing MongoDB: {str(e)}")
58
- raise
59
-
60
- async def create_vector_index(self):
61
- """
62
- Create vector index for similarity search using MongoDB Atlas Vector Search.
63
- """
64
- try:
65
- # Check if index already exists
66
- existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
67
- index_names = [index.get('name') for index in existing_indexes]
68
-
69
- if "vector_index" not in index_names:
70
- # Create vector search index
71
- index_definition = {
72
- "mappings": {
73
- "dynamic": True,
74
- "fields": {
75
- "embedding": {
76
- "dimensions": settings.VECTOR_DIMENSION,
77
- "similarity": "cosine",
78
- "type": "knnVector"
79
- }
80
- }
81
- }
82
- }
83
-
84
- # Create the index
85
- await self.db.command({
86
- "createIndexes": self.vector_collection.name,
87
- "indexes": [
88
- {
89
- "name": "vector_index",
90
- "key": {"embedding": "vector"},
91
- "weights": {"embedding": 1},
92
- "vectorSearchOptions": index_definition
93
- }
94
- ]
95
- })
96
-
97
- # Create additional metadata indexes for filtering
98
- await self.vector_collection.create_index([("metadata.source", ASCENDING)])
99
- await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
100
-
101
- logger.info("Vector search index created")
102
- else:
103
- logger.info("Vector search index already exists")
104
-
105
- except Exception as e:
106
- logger.error(f"Error creating vector index: {str(e)}")
107
- raise
108
-
109
- async def close(self):
110
- """
111
- Close MongoDB connection.
112
- """
113
- if self.client:
114
- self.client.close()
115
- self.client = None
116
- self.db = None
117
- self.vector_collection = None
118
- self.initialized = False
119
- logger.info("MongoDB connection closed")
120
-
121
- # Create a singleton instance
122
- mongodb = MongoDB()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/db/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.db.mongo_db import MongoDB
src/ctp_slack_bot/db/mongo_db.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from motor.motor_asyncio import AsyncIOMotorClient
2
+ from loguru import logger
3
+ from pydantic import BaseModel, model_validator
4
+ #from pymongo import IndexModel, ASCENDING
5
+ from typing import Optional, Self
6
+
7
+ from ctp_slack_bot.core.config import Settings
8
+
9
+ class MongoDB(BaseModel):
10
+ """
11
+ MongoDB connection and initialization class.
12
+ Handles connection to MongoDB, database selection, and index creation.
13
+ """
14
+
15
+ settings: Settings
16
+
17
+ @model_validator(mode='after')
18
+ def post_init(self: Self) -> Self:
19
+ logger.debug("Created {}", self.__class__.__name__)
20
+ return self
21
+
22
+ def __init__(self: Self, settings: Settings) -> Self:
23
+ #self.client: Optional[AsyncIOMotorClient] = None
24
+ #self.db = None
25
+ #self.vector_collection = None
26
+ #self.initialized = False
27
+ pass # The above initialization needs to be done some other way.
28
+
29
+ # async def connect(self):
30
+ # """
31
+ # Connect to MongoDB using connection string from settings.
32
+ # """
33
+ # if self.client is not None:
34
+ # return
35
+
36
+ # if not settings.MONGODB_URI:
37
+ # raise ValueError("MONGODB_URI is not set in environment variables")
38
+
39
+ # try:
40
+ # # Create MongoDB connection
41
+ # self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
42
+ # self.db = self.client[settings.MONGODB_DB_NAME]
43
+ # self.vector_collection = self.db["vector_store"]
44
+ # logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
45
+ # except Exception as e:
46
+ # logger.error(f"Error connecting to MongoDB: {str(e)}")
47
+ # raise
48
+
49
+ # async def initialize(self):
50
+ # """
51
+ # Initialize MongoDB with required collections and indexes.
52
+ # """
53
+ # if self.initialized:
54
+ # return
55
+
56
+ # if not self.client:
57
+ # await self.connect()
58
+
59
+ # try:
60
+ # # Create vector index for similarity search
61
+ # await self.create_vector_index()
62
+ # self.initialized = True
63
+ # logger.info("MongoDB initialized successfully")
64
+ # except Exception as e:
65
+ # logger.error(f"Error initializing MongoDB: {str(e)}")
66
+ # raise
67
+
68
+ # async def create_vector_index(self):
69
+ # """
70
+ # Create vector index for similarity search using MongoDB Atlas Vector Search.
71
+ # """
72
+ # try:
73
+ # # Check if index already exists
74
+ # existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
75
+ # index_names = [index.get('name') for index in existing_indexes]
76
+
77
+ # if "vector_index" not in index_names:
78
+ # # Create vector search index
79
+ # index_definition = {
80
+ # "mappings": {
81
+ # "dynamic": True,
82
+ # "fields": {
83
+ # "embedding": {
84
+ # "dimensions": settings.VECTOR_DIMENSION,
85
+ # "similarity": "cosine",
86
+ # "type": "knnVector"
87
+ # }
88
+ # }
89
+ # }
90
+ # }
91
+
92
+ # # Create the index
93
+ # await self.db.command({
94
+ # "createIndexes": self.vector_collection.name,
95
+ # "indexes": [
96
+ # {
97
+ # "name": "vector_index",
98
+ # "key": {"embedding": "vector"},
99
+ # "weights": {"embedding": 1},
100
+ # "vectorSearchOptions": index_definition
101
+ # }
102
+ # ]
103
+ # })
104
+
105
+ # # Create additional metadata indexes for filtering
106
+ # await self.vector_collection.create_index([("metadata.source", ASCENDING)])
107
+ # await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
108
+
109
+ # logger.info("Vector search index created")
110
+ # else:
111
+ # logger.info("Vector search index already exists")
112
+
113
+ # except Exception as e:
114
+ # logger.error(f"Error creating vector index: {str(e)}")
115
+ # raise
116
+
117
+ # async def close(self):
118
+ # """
119
+ # Close MongoDB connection.
120
+ # """
121
+ # if self.client:
122
+ # self.client.close()
123
+ # self.client = None
124
+ # self.db = None
125
+ # self.vector_collection = None
126
+ # self.initialized = False
127
+ # logger.info("MongoDB connection closed")
src/ctp_slack_bot/models/__init__.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from ctp_slack_bot.models.base import Content, Ingestible, Metadata
2
+ from ctp_slack_bot.models.content import RetreivedContext
3
+ from ctp_slack_bot.models.slack import SlackMessage
4
+ from ctp_slack_bot.models.vector_query import VectorQuery
src/ctp_slack_bot/models/{VectorQuery.py β†’ vector_query.py} RENAMED
@@ -1,6 +1,5 @@
1
  from pydantic import BaseModel, Field, validator
2
  from typing import Optional, List, Dict, Any
3
- from ctp_slack_bot.core.config import settings
4
 
5
  class VectorQuery(BaseModel):
6
  """Model for vector database similarity search queries.
@@ -12,6 +11,6 @@ class VectorQuery(BaseModel):
12
  filter_metadata: Optional filters for metadata fields
13
  """
14
  query_text: str
15
- k: int = Field(default=settings.TOP_K_MATCHES)
16
  score_threshold: float = Field(default=0.7)
17
- filter_metadata: Optional[Dict[str, Any]] = None
 
1
  from pydantic import BaseModel, Field, validator
2
  from typing import Optional, List, Dict, Any
 
3
 
4
  class VectorQuery(BaseModel):
5
  """Model for vector database similarity search queries.
 
11
  filter_metadata: Optional filters for metadata fields
12
  """
13
  query_text: str
14
+ k: int
15
  score_threshold: float = Field(default=0.7)
16
+ filter_metadata: Optional[Dict[str, Any]] = None
src/ctp_slack_bot/services/__init__.py CHANGED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
2
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
3
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
5
+ from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
6
+ from ctp_slack_bot.services.slack_service import SlackService
7
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
src/ctp_slack_bot/services/{AnswerQuestionService.py β†’ answer_retrieval_service.py} RENAMED
@@ -1,17 +1,30 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List, Optional, Tuple
3
- from ctp_slack_bot.core.config import settings
4
- import numpy as np
5
  from openai import OpenAI
6
- from ctp_slack_bot.models.slack import SlackMessage
7
- from ctp_slack_bot.models.content import RetreivedContext
8
 
9
- class GenerateAnswer():
 
 
 
 
 
10
  """
11
  Service for language model operations.
12
  """
13
- def __init__(self):
14
- self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
 
 
 
 
 
 
 
 
 
 
15
 
16
  def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
17
  """Generate a response using OpenAI's API with retrieved context.
@@ -50,11 +63,3 @@ class GenerateAnswer():
50
  )
51
 
52
  return response.choices[0].message.content
53
-
54
-
55
-
56
- ### REMOVE BELOW, PUT SOMEWHERE IN TESTS BUT IDK WHERE YET
57
- # sm = SlackMessage(text="What is the capital of France?", channel_id="123", user_id="456", timestamp="789")
58
- # context = [RetreivedContext(contextual_text="The capital of France is Paris", metadata_source="class materials", similarity_score=0.95)]
59
- # a = GenerateAnswer()
60
- # a.generate_answer(sm, context)
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
 
 
3
  from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
 
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
+
11
+
12
+ class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service.
13
  """
14
  Service for language model operations.
15
  """
16
+
17
+ settings: Settings
18
+ event_brokerage_service: EventBrokerageService
19
+ client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
20
+
21
+ class Config:
22
+ arbitrary_types_allowed = True
23
+
24
+ @model_validator(mode='after')
25
+ def post_init(self: Self) -> Self:
26
+ logger.debug("Created {}", self.__class__.__name__)
27
+ return self
28
 
29
  def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
30
  """Generate a response using OpenAI's API with retrieved context.
 
63
  )
64
 
65
  return response.choices[0].message.content
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/content_ingestion_service.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Self
4
+
5
+ from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
7
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
8
+
9
+ class ContentIngestionService(BaseModel):
10
+ """
11
+ Service for ingesting content.
12
+ """
13
+
14
+ settings: Settings
15
+ vector_database_service: VectorDatabaseService
16
+ vectorization_service: VectorizationService
17
+
18
+ @model_validator(mode='after')
19
+ def post_init(self: Self) -> Self:
20
+ logger.debug("Created {}", self.__class__.__name__)
21
+ return self
src/ctp_slack_bot/services/{ContextRetrievalService.py β†’ context_retrieval_service.py} RENAMED
@@ -1,29 +1,31 @@
1
- import logging
2
- from typing import List, Dict, Any, Optional
 
3
 
4
- from ctp_slack_bot.models.slack import SlackMessage
5
- from ctp_slack_bot.models.content import RetreivedContext
6
- from ctp_slack_bot.models.VectorQuery import VectorQuery
7
- from ctp_slack_bot.services.VectorizationService import VectorizationService
8
- from ctp_slack_bot.services.VectorDatabaseService import VectorDatabaseService
9
- from ctp_slack_bot.core.config import settings
10
 
11
- logger = logging.getLogger(__name__)
12
-
13
- class ContextRetrievalService:
14
  """
15
  Service for retrieving relevant context from the vector database based on user questions.
16
  """
17
-
18
- def __init__(self):
19
- self.vectorization_service = VectorizationService()
20
- self.vector_db_service = VectorDatabaseService()
 
 
 
 
 
21
 
22
  async def initialize(self):
23
  """
24
  Initialize the required services.
25
  """
26
- await self.vector_db_service.initialize()
27
 
28
  async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
29
  """
@@ -62,7 +64,7 @@ class ContextRetrievalService:
62
  )
63
 
64
  # Search for similar content in vector database
65
- context_results = await self.vector_db_service.search_by_similarity(
66
  query=vector_query,
67
  query_embedding=query_embedding
68
  )
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Any, Dict, List, Optional, Self
4
 
5
+ from ctp_slack_bot.core.config import Settings
6
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery
7
+ from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
+ from ctp_slack_bot.services.vectorization_service import VectorizationService
 
 
9
 
10
+ class ContextRetrievalService(BaseModel):
 
 
11
  """
12
  Service for retrieving relevant context from the vector database based on user questions.
13
  """
14
+
15
+ settings: Settings
16
+ vectorization_service: VectorizationService
17
+ vector_database_service: VectorDatabaseService
18
+
19
+ @model_validator(mode='after')
20
+ def post_init(self: Self) -> Self:
21
+ logger.debug("Created {}", self.__class__.__name__)
22
+ return self
23
 
24
  async def initialize(self):
25
  """
26
  Initialize the required services.
27
  """
28
+ await self.vector_database_service.initialize()
29
 
30
  async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
31
  """
 
64
  )
65
 
66
  # Search for similar content in vector database
67
+ context_results = await self.vector_database_service.search_by_similarity(
68
  query=vector_query,
69
  query_embedding=query_embedding
70
  )
src/ctp_slack_bot/services/event_brokerage_service.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import Any, Callable, Dict, List, Self
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
10
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
+
12
+
13
+ class EventBrokerageService(BaseModel):
14
+ """
15
+ Service for brokering events between services.
16
+ """
17
+
18
+ subscribers: Dict[str, List[Callable]] = {}
19
+
20
+ class Config:
21
+ arbitrary_types_allowed = True
22
+
23
+ @model_validator(mode='after')
24
+ def post_init(self: Self) -> Self:
25
+ logger.debug("Created {}", self.__class__.__name__)
26
+ return self
27
+
28
+ def subscribe(self: Self, event_type: str, callback: Callable) -> None:
29
+ """Subscribe to an event type with a callback function."""
30
+ if event_type not in self.subscribers:
31
+ self.subscribers[event_type] = []
32
+ self.subscribers[event_type].append(callback)
33
+
34
+ def publish(self: Self, event_type: str, data: Any = None) -> None:
35
+ """Publish an event with optional data to all subscribers."""
36
+ if event_type in self.subscribers:
37
+ for callback in self.subscribers[event_type]:
38
+ callback(data)
src/ctp_slack_bot/services/question_dispatch_service.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
10
+ from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
12
+
13
+
14
+ class QuestionDispatchService(BaseModel):
15
+ """
16
+ Service for determining whether a Slack message constitutes a question.
17
+ """
18
+
19
+ settings: Settings
20
+ event_brokerage_service: EventBrokerageService
21
+ context_retrieval_service: ContextRetrievalService
22
+ answer_retrieval_service: AnswerRetrievalService
23
+
24
+ @model_validator(mode='after')
25
+ def post_init(self: Self) -> Self:
26
+ logger.debug("Created {}", self.__class__.__name__)
27
+ return self
28
+
29
+ def push(self: Self, message: SlackMessage) -> None:
30
+ context = self.context_retrieval_service.get_context(message)
31
+ self.answer_retrieval_service.generate_answer(message, context)
src/ctp_slack_bot/services/slack_service.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from asyncio import create_task
2
+ from loguru import logger
3
+ from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self, Tuple
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
+
11
+
12
+ class SlackService(BaseModel):
13
+ """
14
+ Service for interfacing with Slack.
15
+ """
16
+
17
+ settings: Settings
18
+ event_brokerage_service: EventBrokerageService
19
+
20
+ @model_validator(mode='after')
21
+ def post_init(self: Self) -> Self:
22
+ logger.debug("Created {}", self.__class__.__name__)
23
+ return self
src/ctp_slack_bot/services/{VectorDatabaseService.py β†’ vector_database_service.py} RENAMED
@@ -1,18 +1,24 @@
1
- import logging
2
- from typing import List, Dict, Any, Optional
3
- # import numpy as np
4
 
5
- from ctp_slack_bot.db.MongoDB import mongodb
6
- from ctp_slack_bot.models.VectorQuery import VectorQuery
7
- from ctp_slack_bot.models.content import RetreivedContext
8
 
9
- logger = logging.getLogger(__name__)
10
-
11
- class VectorDatabaseService:
12
  """
13
  Service for storing and retrieving vector embeddings from MongoDB.
14
  """
15
-
 
 
 
 
 
 
 
 
16
  async def initialize(self):
17
  """
18
  Initialize the database connection.
 
1
+ from loguru import logger
2
+ from pydantic import BaseModel, model_validator
3
+ from typing import Any, Dict, List, Optional, Self
4
 
5
+ from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.db import MongoDB
7
+ from ctp_slack_bot.models import VectorQuery, RetreivedContext
8
 
9
+ class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
 
 
10
  """
11
  Service for storing and retrieving vector embeddings from MongoDB.
12
  """
13
+
14
+ settings: Settings
15
+ mongo_db: MongoDB
16
+
17
+ @model_validator(mode='after')
18
+ def post_init(self: Self) -> Self:
19
+ logger.debug("Created {}", self.__class__.__name__)
20
+ return self
21
+
22
  async def initialize(self):
23
  """
24
  Initialize the database connection.
src/ctp_slack_bot/services/{VectorizationService.py β†’ vectorization_service.py} RENAMED
@@ -1,17 +1,26 @@
1
- from pydantic import BaseModel, validator
2
- from typing import List, Optional
3
- from ctp_slack_bot.core.config import settings
4
  import numpy as np
5
  from openai import OpenAI
 
 
6
 
 
7
 
8
- class VectorizationService():
9
  """
10
  Service for vectorizing chunks of text data.
11
  """
12
- def __init__(self):
13
- self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def get_embeddings(self, texts: List[str]) -> np.ndarray:
17
  """Get embeddings for a list of texts using OpenAI's API.
@@ -28,7 +37,7 @@ class VectorizationService():
28
  try:
29
  # Use the initialized client instead of the global openai module
30
  response = self.client.embeddings.create(
31
- model=settings.EMBEDDING_MODEL,
32
  input=texts,
33
  encoding_format="float" # Ensure we get raw float values
34
  )
@@ -36,9 +45,9 @@ class VectorizationService():
36
  # Extract embeddings and verify dimensions
37
  embeddings = np.array([data.embedding for data in response.data])
38
 
39
- if embeddings.shape[1] != settings.VECTOR_DIMENSION:
40
  raise ValueError(
41
- f"Embedding dimension mismatch. Expected {settings.VECTOR_DIMENSION}, "
42
  f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
43
  f"in config.py to match the model's output."
44
  )
 
1
+ from loguru import logger
 
 
2
  import numpy as np
3
  from openai import OpenAI
4
+ from pydantic import BaseModel, model_validator
5
+ from typing import List, Optional, Self
6
 
7
+ from ctp_slack_bot.core import Settings
8
 
9
+ class VectorizationService(BaseModel):
10
  """
11
  Service for vectorizing chunks of text data.
12
  """
 
 
13
 
14
+ settings: Settings
15
+ client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
16
+
17
+ class Config:
18
+ arbitrary_types_allowed = True
19
+
20
+ @model_validator(mode='after')
21
+ def post_init(self: Self) -> Self:
22
+ logger.debug("Created {}", self.__class__.__name__)
23
+ return self
24
 
25
  def get_embeddings(self, texts: List[str]) -> np.ndarray:
26
  """Get embeddings for a list of texts using OpenAI's API.
 
37
  try:
38
  # Use the initialized client instead of the global openai module
39
  response = self.client.embeddings.create(
40
+ model=self.settings.EMBEDDING_MODEL,
41
  input=texts,
42
  encoding_format="float" # Ensure we get raw float values
43
  )
 
45
  # Extract embeddings and verify dimensions
46
  embeddings = np.array([data.embedding for data in response.data])
47
 
48
+ if embeddings.shape[1] != self.settings.VECTOR_DIMENSION:
49
  raise ValueError(
50
+ f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, "
51
  f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
52
  f"in config.py to match the model's output."
53
  )
src/ctp_slack_bot/tasks/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
src/ctp_slack_bot/tasks/scheduler.py CHANGED
@@ -6,9 +6,7 @@ from loguru import logger
6
  from pytz import timezone
7
  from typing import Optional
8
 
9
- from ctp_slack_bot.containers import Container
10
- #from ctp_slack_bot.tasks.error_report import send_error_report
11
- #from ctp_slack_bot.tasks.transcript_cleanup import cleanup_old_transcripts
12
 
13
  @inject
14
  def start_scheduler(container: Container) -> AsyncIOScheduler:
 
6
  from pytz import timezone
7
  from typing import Optional
8
 
9
+ from ctp_slack_bot import Container
 
 
10
 
11
  @inject
12
  def start_scheduler(container: Container) -> AsyncIOScheduler: