Spaces:
Runtime error
Runtime error
Merge pull request #2 from CUNYTechPrep/dependency-injection
Browse files- README.MD +23 -2
- pyproject.toml +25 -22
- src/ctp_slack_bot/__init__.py +1 -0
- src/ctp_slack_bot/api/__init__.py +1 -0
- src/ctp_slack_bot/api/main.py +32 -30
- src/ctp_slack_bot/api/routes.py +10 -4
- src/ctp_slack_bot/containers.py +44 -0
- src/ctp_slack_bot/core/__init__.py +2 -0
- src/ctp_slack_bot/core/config.py +1 -15
- src/ctp_slack_bot/core/logging.py +21 -29
- src/ctp_slack_bot/db/MongoDB.py +0 -122
- src/ctp_slack_bot/db/__init__.py +1 -0
- src/ctp_slack_bot/db/mongo_db.py +127 -0
- src/ctp_slack_bot/models/__init__.py +4 -0
- src/ctp_slack_bot/models/{VectorQuery.py → vector_query.py} +2 -3
- src/ctp_slack_bot/services/__init__.py +8 -0
- src/ctp_slack_bot/services/{AnswerQuestionService.py → answer_retrieval_service.py} +22 -17
- src/ctp_slack_bot/services/content_ingestion_service.py +21 -0
- src/ctp_slack_bot/services/{ContextRetrievalService.py → context_retrieval_service.py} +19 -17
- src/ctp_slack_bot/services/event_brokerage_service.py +38 -0
- src/ctp_slack_bot/services/question_dispatch_service.py +31 -0
- src/ctp_slack_bot/services/slack_service.py +23 -0
- src/ctp_slack_bot/services/{VectorDatabaseService.py → vector_database_service.py} +16 -10
- src/ctp_slack_bot/services/{VectorizationService.py → vectorization_service.py} +18 -9
- src/ctp_slack_bot/tasks/__init__.py +1 -0
- src/ctp_slack_bot/tasks/scheduler.py +17 -21
README.MD
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
# CTP Slack Bot
|
2 |
|
|
|
|
|
|
|
|
|
|
|
3 |
## Tech Stack
|
4 |
|
5 |
* Hugging Face Spaces for hosting and serverless API
|
@@ -7,7 +12,10 @@
|
|
7 |
* MongoDB for data persistence
|
8 |
* Docker for containerization
|
9 |
* Python
|
10 |
-
*
|
|
|
|
|
|
|
11 |
|
12 |
## General Project Structure
|
13 |
|
@@ -26,7 +34,8 @@
|
|
26 |
* `scripts/`: utility scripts for development, deployment, etc.
|
27 |
* `run-dev.sh`: script to run the application locally
|
28 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
29 |
-
* `.env`: local environment variables for development purposes
|
|
|
30 |
|
31 |
## How to Run the Application
|
32 |
|
@@ -34,6 +43,18 @@
|
|
34 |
|
35 |
Just run the Docker image. 😉
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
### For Development
|
38 |
|
39 |
Development usually requires rapid iteration. That means a change in the code ought to be reflected as soon as possible in the behavior of the application.
|
|
|
1 |
# CTP Slack Bot
|
2 |
|
3 |
+
## _Modus Operandi_ in a Nutshell
|
4 |
+
|
5 |
+
* Intelligently responds to Slack messages based on a repository of data.
|
6 |
+
* Periodically checks for new content to add to its repository.
|
7 |
+
|
8 |
## Tech Stack
|
9 |
|
10 |
* Hugging Face Spaces for hosting and serverless API
|
|
|
12 |
* MongoDB for data persistence
|
13 |
* Docker for containerization
|
14 |
* Python
|
15 |
+
* FastAPI for serving HTTP requests
|
16 |
+
* httpx for making HTTP requests
|
17 |
+
* APScheduler for running periodic tasks in the background
|
18 |
+
* See `pyproject.toml` for additional Python packages.
|
19 |
|
20 |
## General Project Structure
|
21 |
|
|
|
34 |
* `scripts/`: utility scripts for development, deployment, etc.
|
35 |
* `run-dev.sh`: script to run the application locally
|
36 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
37 |
+
* `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`)
|
38 |
+
* `Dockerfile`: Docker container build definition
|
39 |
|
40 |
## How to Run the Application
|
41 |
|
|
|
43 |
|
44 |
Just run the Docker image. 😉
|
45 |
|
46 |
+
Build it with:
|
47 |
+
|
48 |
+
```sh
|
49 |
+
docker build . -t ctp-slack-bot
|
50 |
+
```
|
51 |
+
|
52 |
+
Run it with:
|
53 |
+
|
54 |
+
```sh
|
55 |
+
docker run --env-file=.env -p 8000:8000 --name my-ctp-slack-bot-instance ctp-slack-bot
|
56 |
+
```
|
57 |
+
|
58 |
### For Development
|
59 |
|
60 |
Development usually requires rapid iteration. That means a change in the code ought to be reflected as soon as possible in the behavior of the application.
|
pyproject.toml
CHANGED
@@ -19,34 +19,37 @@ classifiers = [
|
|
19 |
"Operating System :: OS Independent",
|
20 |
]
|
21 |
dependencies = [
|
22 |
-
"
|
23 |
-
"pydantic
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
|
|
31 |
"pytz>=2025.2",
|
32 |
-
"apscheduler>=3.
|
33 |
-
"slack-sdk>=3.
|
34 |
-
"pymongo>=4.
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
|
|
|
|
39 |
]
|
40 |
|
41 |
[project.optional-dependencies]
|
42 |
dev = [
|
43 |
-
"pytest>=
|
44 |
-
"pytest-cov>=
|
45 |
-
"mypy>=1.
|
46 |
"types-pytz>=2025.2",
|
47 |
-
"black>=
|
48 |
-
"isort>=
|
49 |
-
"ruff>=0.
|
50 |
]
|
51 |
|
52 |
[project.urls]
|
|
|
19 |
"Operating System :: OS Independent",
|
20 |
]
|
21 |
dependencies = [
|
22 |
+
"dependency-injector>=4.46.0",
|
23 |
+
"pydantic>=2.11.2",
|
24 |
+
"pydantic-settings>=2.8.1",
|
25 |
+
"fastapi>=0.115.12",
|
26 |
+
"uvicorn>=0.34.0",
|
27 |
+
"loguru>=0.7.3",
|
28 |
+
"python-dotenv>=1.1.0",
|
29 |
+
"httpx>=0.28.1",
|
30 |
+
"tenacity>=9.1.2",
|
31 |
+
"pybreaker>=1.3.0",
|
32 |
"pytz>=2025.2",
|
33 |
+
"apscheduler>=3.11.0",
|
34 |
+
"slack-sdk>=3.35.0",
|
35 |
+
"pymongo>=4.11.3 ",
|
36 |
+
"numpy>=2.2.4",
|
37 |
+
"webvtt-py>=0.5.1",
|
38 |
+
"openai>=1.70.0",
|
39 |
+
# "langchain>=0.3.23",
|
40 |
+
# "transformers>=4.51.0",
|
41 |
+
# "torch>=2.6.0",
|
42 |
]
|
43 |
|
44 |
[project.optional-dependencies]
|
45 |
dev = [
|
46 |
+
"pytest>=8.3.5",
|
47 |
+
"pytest-cov>=6.1.1",
|
48 |
+
"mypy>=1.15.0",
|
49 |
"types-pytz>=2025.2",
|
50 |
+
"black>=25.1.0",
|
51 |
+
"isort>=6.0.1",
|
52 |
+
"ruff>=0.11.4",
|
53 |
]
|
54 |
|
55 |
[project.urls]
|
src/ctp_slack_bot/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.containers import Container
|
src/ctp_slack_bot/api/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.api.main import app, run
|
src/ctp_slack_bot/api/main.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
from contextlib import asynccontextmanager
|
2 |
-
from fastapi import FastAPI, HTTPException
|
3 |
from loguru import logger
|
4 |
-
from typing import AsyncGenerator
|
|
|
5 |
|
|
|
6 |
from ctp_slack_bot.api.routes import router
|
7 |
-
from ctp_slack_bot.core
|
8 |
-
from ctp_slack_bot.core.logging import setup_logging
|
9 |
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
10 |
-
from ctp_slack_bot.tasks
|
11 |
-
|
12 |
|
13 |
@asynccontextmanager
|
14 |
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
@@ -16,20 +16,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
|
|
16 |
Lifespan context manager for FastAPI application.
|
17 |
Handles startup and shutdown events.
|
18 |
"""
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
logger.info("Starting application")
|
22 |
-
|
23 |
-
# Start scheduler
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
yield
|
28 |
-
|
29 |
-
# Shutdown
|
30 |
logger.info("Shutting down application")
|
31 |
-
|
32 |
-
|
33 |
|
34 |
|
35 |
app = FastAPI(
|
@@ -39,30 +44,27 @@ app = FastAPI(
|
|
39 |
lifespan=lifespan,
|
40 |
)
|
41 |
|
42 |
-
# Include routers
|
43 |
app.include_router(router)
|
44 |
|
|
|
45 |
@app.get("/health")
|
46 |
-
async def
|
47 |
"""Health check"""
|
48 |
return {
|
49 |
"status": "healthy"
|
50 |
}
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
"""Server-internal environment variables"""
|
55 |
-
if not settings.DEBUG:
|
56 |
-
raise HTTPException(status_code=404)
|
57 |
-
return settings
|
58 |
-
|
59 |
-
|
60 |
-
if __name__ == "__main__":
|
61 |
import uvicorn
|
62 |
-
|
63 |
uvicorn.run(
|
64 |
"main:app",
|
65 |
host=settings.API_HOST,
|
66 |
port=settings.API_PORT,
|
67 |
reload=settings.DEBUG
|
68 |
)
|
|
|
|
|
|
|
|
1 |
from contextlib import asynccontextmanager
|
2 |
+
from fastapi import FastAPI, HTTPException, Depends
|
3 |
from loguru import logger
|
4 |
+
from typing import AsyncGenerator
|
5 |
+
from dependency_injector.wiring import inject, Provide
|
6 |
|
7 |
+
from ctp_slack_bot import Container
|
8 |
from ctp_slack_bot.api.routes import router
|
9 |
+
from ctp_slack_bot.core import Settings, setup_logging
|
|
|
10 |
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
11 |
+
from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
|
|
|
12 |
|
13 |
@asynccontextmanager
|
14 |
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
|
|
16 |
Lifespan context manager for FastAPI application.
|
17 |
Handles startup and shutdown events.
|
18 |
"""
|
19 |
+
# Initialize container and wire the container to modules that need dependency injection.
|
20 |
+
container = Container()
|
21 |
+
container.wire(packages=['ctp_slack_bot'])
|
22 |
+
app.container = container
|
23 |
+
|
24 |
+
# Setup logging.
|
25 |
+
setup_logging(container)
|
26 |
logger.info("Starting application")
|
27 |
+
|
28 |
+
# Start the scheduler.
|
29 |
+
scheduler = start_scheduler(container)
|
30 |
+
logger.info("Started scheduler")
|
31 |
+
|
32 |
+
yield # control to FastAPI until shutdown.
|
33 |
+
|
34 |
+
# Shutdown.
|
35 |
logger.info("Shutting down application")
|
36 |
+
stop_scheduler(scheduler)
|
37 |
+
logger.info("Stopped scheduler")
|
38 |
|
39 |
|
40 |
app = FastAPI(
|
|
|
44 |
lifespan=lifespan,
|
45 |
)
|
46 |
|
47 |
+
# Include routers.
|
48 |
app.include_router(router)
|
49 |
|
50 |
+
# Provide a minimalist health check endpoint for clients to detect availability.
|
51 |
@app.get("/health")
|
52 |
+
async def get_health() -> dict[str, str]:
|
53 |
"""Health check"""
|
54 |
return {
|
55 |
"status": "healthy"
|
56 |
}
|
57 |
|
58 |
+
# Alternate starting path for development
|
59 |
+
def run() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
import uvicorn
|
61 |
+
settings = Settings() # type: ignore
|
62 |
uvicorn.run(
|
63 |
"main:app",
|
64 |
host=settings.API_HOST,
|
65 |
port=settings.API_PORT,
|
66 |
reload=settings.DEBUG
|
67 |
)
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
run()
|
src/ctp_slack_bot/api/routes.py
CHANGED
@@ -1,13 +1,19 @@
|
|
1 |
from fastapi import APIRouter, Depends, HTTPException, status
|
|
|
2 |
from loguru import logger
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
#from ctp_slack_bot.services.transcript_service import TranscriptService
|
8 |
|
9 |
router = APIRouter(prefix="/api/v1")
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# @router.post("/transcripts/analyze", response_model=TranscriptResponse)
|
13 |
# async def analyze_transcript(
|
|
|
1 |
from fastapi import APIRouter, Depends, HTTPException, status
|
2 |
+
from dependency_injector.wiring import inject, Provide
|
3 |
from loguru import logger
|
4 |
|
5 |
+
from ctp_slack_bot import Container
|
6 |
+
from ctp_slack_bot.core import Settings
|
7 |
+
from ctp_slack_bot.services import SlackService
|
|
|
8 |
|
9 |
router = APIRouter(prefix="/api/v1")
|
10 |
|
11 |
+
@router.get("/env", response_model=Settings)
|
12 |
+
@inject
|
13 |
+
async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
|
14 |
+
if not settings.DEBUG:
|
15 |
+
raise HTTPException(status_code=404)
|
16 |
+
return settings
|
17 |
|
18 |
# @router.post("/transcripts/analyze", response_model=TranscriptResponse)
|
19 |
# async def analyze_transcript(
|
src/ctp_slack_bot/containers.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dependency_injector.containers import DeclarativeContainer
|
2 |
+
from dependency_injector.providers import Factory, Singleton
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
from ctp_slack_bot.core.config import Settings
|
6 |
+
from ctp_slack_bot.db.mongo_db import MongoDB
|
7 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
8 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
9 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
10 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
11 |
+
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
12 |
+
from ctp_slack_bot.services.slack_service import SlackService
|
13 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
14 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
15 |
+
|
16 |
+
|
17 |
+
class Container(DeclarativeContainer):
|
18 |
+
settings = Singleton(Settings)
|
19 |
+
|
20 |
+
event_brokerage_service = Singleton(EventBrokerageService)
|
21 |
+
|
22 |
+
mongo_db = Singleton(MongoDB, settings=settings)
|
23 |
+
|
24 |
+
# Repositories
|
25 |
+
# transcript_repository = Factory(
|
26 |
+
# # Your transcript repository class
|
27 |
+
# db=db
|
28 |
+
# )
|
29 |
+
|
30 |
+
open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client.
|
31 |
+
|
32 |
+
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
33 |
+
|
34 |
+
vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client)
|
35 |
+
|
36 |
+
content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
|
37 |
+
|
38 |
+
context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
|
39 |
+
|
40 |
+
answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client)
|
41 |
+
|
42 |
+
question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
|
43 |
+
|
44 |
+
slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service)
|
src/ctp_slack_bot/core/__init__.py
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.core.config import Settings
|
2 |
+
from ctp_slack_bot.core.logging import logger, setup_logging
|
src/ctp_slack_bot/core/config.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
-
from functools import lru_cache
|
2 |
-
from typing import Literal, Optional
|
3 |
-
|
4 |
from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
|
5 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
6 |
-
|
7 |
|
8 |
class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
|
9 |
"""
|
@@ -54,14 +51,3 @@ class Settings(BaseSettings): # TODO: Strong guarantees of validity, because gar
|
|
54 |
env_file_encoding="utf-8",
|
55 |
case_sensitive=True,
|
56 |
)
|
57 |
-
|
58 |
-
|
59 |
-
@lru_cache
|
60 |
-
def get_settings() -> Settings:
|
61 |
-
"""
|
62 |
-
Get cached settings instance.
|
63 |
-
"""
|
64 |
-
return Settings() # type: ignore
|
65 |
-
|
66 |
-
|
67 |
-
settings = get_settings()
|
|
|
|
|
|
|
|
|
1 |
from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
|
2 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
3 |
+
from typing import Literal, Optional
|
4 |
|
5 |
class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
|
6 |
"""
|
|
|
51 |
env_file_encoding="utf-8",
|
52 |
case_sensitive=True,
|
53 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/core/logging.py
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
-
import
|
2 |
-
import sys
|
3 |
-
from typing import Dict, Union
|
4 |
-
|
5 |
from loguru import logger
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
class InterceptHandler(logging.Handler):
|
11 |
"""
|
12 |
Intercept standard logging messages toward Loguru.
|
13 |
|
@@ -15,7 +11,7 @@ class InterceptHandler(logging.Handler):
|
|
15 |
to Loguru, allowing unified logging across the application.
|
16 |
"""
|
17 |
|
18 |
-
def emit(self, record:
|
19 |
# Get corresponding Loguru level if it exists
|
20 |
try:
|
21 |
level = logger.level(record.levelname).name
|
@@ -23,8 +19,8 @@ class InterceptHandler(logging.Handler):
|
|
23 |
level = record.levelno
|
24 |
|
25 |
# Find caller from where the logged message originated
|
26 |
-
frame, depth =
|
27 |
-
while frame and frame.f_code.co_filename ==
|
28 |
frame = frame.f_back
|
29 |
depth += 1
|
30 |
|
@@ -33,7 +29,7 @@ class InterceptHandler(logging.Handler):
|
|
33 |
)
|
34 |
|
35 |
|
36 |
-
def setup_logging() -> None:
|
37 |
"""
|
38 |
Configure logging with Loguru.
|
39 |
|
@@ -41,9 +37,12 @@ def setup_logging() -> None:
|
|
41 |
configures the log format based on settings, and intercepts
|
42 |
standard logging messages.
|
43 |
"""
|
|
|
|
|
|
|
44 |
# Remove default loguru handler
|
45 |
logger.remove()
|
46 |
-
|
47 |
# Determine log format
|
48 |
if settings.LOG_FORMAT == "json":
|
49 |
log_format = {
|
@@ -62,17 +61,17 @@ def setup_logging() -> None:
|
|
62 |
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
|
63 |
"<level>{message}</level>"
|
64 |
)
|
65 |
-
|
66 |
# Add console handler
|
67 |
logger.add(
|
68 |
-
|
69 |
format=format_string,
|
70 |
level=settings.LOG_LEVEL,
|
71 |
serialize=(settings.LOG_FORMAT == "json"),
|
72 |
backtrace=True,
|
73 |
diagnose=True,
|
74 |
)
|
75 |
-
|
76 |
# Add file handler for non-DEBUG environments
|
77 |
if settings.LOG_LEVEL != "DEBUG":
|
78 |
logger.add(
|
@@ -84,19 +83,12 @@ def setup_logging() -> None:
|
|
84 |
level=settings.LOG_LEVEL,
|
85 |
serialize=(settings.LOG_FORMAT == "json"),
|
86 |
)
|
87 |
-
|
88 |
# Intercept standard logging messages
|
89 |
-
|
90 |
-
|
91 |
# Update logging levels for some noisy libraries
|
92 |
-
for logger_name in
|
93 |
-
|
94 |
-
|
95 |
-
"fastapi",
|
96 |
-
"httpx",
|
97 |
-
"apscheduler",
|
98 |
-
"pymongo",
|
99 |
-
]:
|
100 |
-
logging.getLogger(logger_name).setLevel(logging.INFO)
|
101 |
-
|
102 |
logger.info(f"Logging configured with level {settings.LOG_LEVEL}")
|
|
|
1 |
+
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
|
|
|
|
|
|
|
2 |
from loguru import logger
|
3 |
+
from sys import stderr
|
4 |
+
from typing import Dict, Union
|
5 |
|
6 |
+
class InterceptHandler(Handler):
|
|
|
|
|
|
|
7 |
"""
|
8 |
Intercept standard logging messages toward Loguru.
|
9 |
|
|
|
11 |
to Loguru, allowing unified logging across the application.
|
12 |
"""
|
13 |
|
14 |
+
def emit(self, record: LogRecord) -> None:
|
15 |
# Get corresponding Loguru level if it exists
|
16 |
try:
|
17 |
level = logger.level(record.levelname).name
|
|
|
19 |
level = record.levelno
|
20 |
|
21 |
# Find caller from where the logged message originated
|
22 |
+
frame, depth = currentframe(), 2
|
23 |
+
while frame and frame.f_code.co_filename == logging_file:
|
24 |
frame = frame.f_back
|
25 |
depth += 1
|
26 |
|
|
|
29 |
)
|
30 |
|
31 |
|
32 |
+
def setup_logging(container: "Container") -> None:
|
33 |
"""
|
34 |
Configure logging with Loguru.
|
35 |
|
|
|
37 |
configures the log format based on settings, and intercepts
|
38 |
standard logging messages.
|
39 |
"""
|
40 |
+
from ctp_slack_bot import Container
|
41 |
+
settings = container.settings() if container else Provide[Container.settings]
|
42 |
+
|
43 |
# Remove default loguru handler
|
44 |
logger.remove()
|
45 |
+
|
46 |
# Determine log format
|
47 |
if settings.LOG_FORMAT == "json":
|
48 |
log_format = {
|
|
|
61 |
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - "
|
62 |
"<level>{message}</level>"
|
63 |
)
|
64 |
+
|
65 |
# Add console handler
|
66 |
logger.add(
|
67 |
+
stderr,
|
68 |
format=format_string,
|
69 |
level=settings.LOG_LEVEL,
|
70 |
serialize=(settings.LOG_FORMAT == "json"),
|
71 |
backtrace=True,
|
72 |
diagnose=True,
|
73 |
)
|
74 |
+
|
75 |
# Add file handler for non-DEBUG environments
|
76 |
if settings.LOG_LEVEL != "DEBUG":
|
77 |
logger.add(
|
|
|
83 |
level=settings.LOG_LEVEL,
|
84 |
serialize=(settings.LOG_FORMAT == "json"),
|
85 |
)
|
86 |
+
|
87 |
# Intercept standard logging messages
|
88 |
+
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
89 |
+
|
90 |
# Update logging levels for some noisy libraries
|
91 |
+
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"):
|
92 |
+
getLogger(logger_name).setLevel(INFO)
|
93 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
logger.info(f"Logging configured with level {settings.LOG_LEVEL}")
|
src/ctp_slack_bot/db/MongoDB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
from motor.motor_asyncio import AsyncIOMotorClient
|
2 |
-
from pymongo import IndexModel, ASCENDING
|
3 |
-
import logging
|
4 |
-
from typing import Optional
|
5 |
-
|
6 |
-
from ctp_slack_bot.core.config import settings
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
|
10 |
-
class MongoDB:
|
11 |
-
"""
|
12 |
-
MongoDB connection and initialization class.
|
13 |
-
Handles connection to MongoDB, database selection, and index creation.
|
14 |
-
"""
|
15 |
-
def __init__(self):
|
16 |
-
self.client: Optional[AsyncIOMotorClient] = None
|
17 |
-
self.db = None
|
18 |
-
self.vector_collection = None
|
19 |
-
self.initialized = False
|
20 |
-
|
21 |
-
async def connect(self):
|
22 |
-
"""
|
23 |
-
Connect to MongoDB using connection string from settings.
|
24 |
-
"""
|
25 |
-
if self.client is not None:
|
26 |
-
return
|
27 |
-
|
28 |
-
if not settings.MONGODB_URI:
|
29 |
-
raise ValueError("MONGODB_URI is not set in environment variables")
|
30 |
-
|
31 |
-
try:
|
32 |
-
# Create MongoDB connection
|
33 |
-
self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
|
34 |
-
self.db = self.client[settings.MONGODB_DB_NAME]
|
35 |
-
self.vector_collection = self.db["vector_store"]
|
36 |
-
logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
|
37 |
-
except Exception as e:
|
38 |
-
logger.error(f"Error connecting to MongoDB: {str(e)}")
|
39 |
-
raise
|
40 |
-
|
41 |
-
async def initialize(self):
|
42 |
-
"""
|
43 |
-
Initialize MongoDB with required collections and indexes.
|
44 |
-
"""
|
45 |
-
if self.initialized:
|
46 |
-
return
|
47 |
-
|
48 |
-
if not self.client:
|
49 |
-
await self.connect()
|
50 |
-
|
51 |
-
try:
|
52 |
-
# Create vector index for similarity search
|
53 |
-
await self.create_vector_index()
|
54 |
-
self.initialized = True
|
55 |
-
logger.info("MongoDB initialized successfully")
|
56 |
-
except Exception as e:
|
57 |
-
logger.error(f"Error initializing MongoDB: {str(e)}")
|
58 |
-
raise
|
59 |
-
|
60 |
-
async def create_vector_index(self):
|
61 |
-
"""
|
62 |
-
Create vector index for similarity search using MongoDB Atlas Vector Search.
|
63 |
-
"""
|
64 |
-
try:
|
65 |
-
# Check if index already exists
|
66 |
-
existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
|
67 |
-
index_names = [index.get('name') for index in existing_indexes]
|
68 |
-
|
69 |
-
if "vector_index" not in index_names:
|
70 |
-
# Create vector search index
|
71 |
-
index_definition = {
|
72 |
-
"mappings": {
|
73 |
-
"dynamic": True,
|
74 |
-
"fields": {
|
75 |
-
"embedding": {
|
76 |
-
"dimensions": settings.VECTOR_DIMENSION,
|
77 |
-
"similarity": "cosine",
|
78 |
-
"type": "knnVector"
|
79 |
-
}
|
80 |
-
}
|
81 |
-
}
|
82 |
-
}
|
83 |
-
|
84 |
-
# Create the index
|
85 |
-
await self.db.command({
|
86 |
-
"createIndexes": self.vector_collection.name,
|
87 |
-
"indexes": [
|
88 |
-
{
|
89 |
-
"name": "vector_index",
|
90 |
-
"key": {"embedding": "vector"},
|
91 |
-
"weights": {"embedding": 1},
|
92 |
-
"vectorSearchOptions": index_definition
|
93 |
-
}
|
94 |
-
]
|
95 |
-
})
|
96 |
-
|
97 |
-
# Create additional metadata indexes for filtering
|
98 |
-
await self.vector_collection.create_index([("metadata.source", ASCENDING)])
|
99 |
-
await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
|
100 |
-
|
101 |
-
logger.info("Vector search index created")
|
102 |
-
else:
|
103 |
-
logger.info("Vector search index already exists")
|
104 |
-
|
105 |
-
except Exception as e:
|
106 |
-
logger.error(f"Error creating vector index: {str(e)}")
|
107 |
-
raise
|
108 |
-
|
109 |
-
async def close(self):
|
110 |
-
"""
|
111 |
-
Close MongoDB connection.
|
112 |
-
"""
|
113 |
-
if self.client:
|
114 |
-
self.client.close()
|
115 |
-
self.client = None
|
116 |
-
self.db = None
|
117 |
-
self.vector_collection = None
|
118 |
-
self.initialized = False
|
119 |
-
logger.info("MongoDB connection closed")
|
120 |
-
|
121 |
-
# Create a singleton instance
|
122 |
-
mongodb = MongoDB()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/db/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.db.mongo_db import MongoDB
|
src/ctp_slack_bot/db/mongo_db.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#from motor.motor_asyncio import AsyncIOMotorClient
|
2 |
+
from loguru import logger
|
3 |
+
from pydantic import BaseModel, model_validator
|
4 |
+
#from pymongo import IndexModel, ASCENDING
|
5 |
+
from typing import Optional, Self
|
6 |
+
|
7 |
+
from ctp_slack_bot.core.config import Settings
|
8 |
+
|
9 |
+
class MongoDB(BaseModel):
|
10 |
+
"""
|
11 |
+
MongoDB connection and initialization class.
|
12 |
+
Handles connection to MongoDB, database selection, and index creation.
|
13 |
+
"""
|
14 |
+
|
15 |
+
settings: Settings
|
16 |
+
|
17 |
+
@model_validator(mode='after')
|
18 |
+
def post_init(self: Self) -> Self:
|
19 |
+
logger.debug("Created {}", self.__class__.__name__)
|
20 |
+
return self
|
21 |
+
|
22 |
+
def __init__(self: Self, settings: Settings) -> Self:
|
23 |
+
#self.client: Optional[AsyncIOMotorClient] = None
|
24 |
+
#self.db = None
|
25 |
+
#self.vector_collection = None
|
26 |
+
#self.initialized = False
|
27 |
+
pass # The above initialization needs to be done some other way.
|
28 |
+
|
29 |
+
# async def connect(self):
|
30 |
+
# """
|
31 |
+
# Connect to MongoDB using connection string from settings.
|
32 |
+
# """
|
33 |
+
# if self.client is not None:
|
34 |
+
# return
|
35 |
+
|
36 |
+
# if not settings.MONGODB_URI:
|
37 |
+
# raise ValueError("MONGODB_URI is not set in environment variables")
|
38 |
+
|
39 |
+
# try:
|
40 |
+
# # Create MongoDB connection
|
41 |
+
# self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
|
42 |
+
# self.db = self.client[settings.MONGODB_DB_NAME]
|
43 |
+
# self.vector_collection = self.db["vector_store"]
|
44 |
+
# logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
|
45 |
+
# except Exception as e:
|
46 |
+
# logger.error(f"Error connecting to MongoDB: {str(e)}")
|
47 |
+
# raise
|
48 |
+
|
49 |
+
# async def initialize(self):
|
50 |
+
# """
|
51 |
+
# Initialize MongoDB with required collections and indexes.
|
52 |
+
# """
|
53 |
+
# if self.initialized:
|
54 |
+
# return
|
55 |
+
|
56 |
+
# if not self.client:
|
57 |
+
# await self.connect()
|
58 |
+
|
59 |
+
# try:
|
60 |
+
# # Create vector index for similarity search
|
61 |
+
# await self.create_vector_index()
|
62 |
+
# self.initialized = True
|
63 |
+
# logger.info("MongoDB initialized successfully")
|
64 |
+
# except Exception as e:
|
65 |
+
# logger.error(f"Error initializing MongoDB: {str(e)}")
|
66 |
+
# raise
|
67 |
+
|
68 |
+
# async def create_vector_index(self):
|
69 |
+
# """
|
70 |
+
# Create vector index for similarity search using MongoDB Atlas Vector Search.
|
71 |
+
# """
|
72 |
+
# try:
|
73 |
+
# # Check if index already exists
|
74 |
+
# existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
|
75 |
+
# index_names = [index.get('name') for index in existing_indexes]
|
76 |
+
|
77 |
+
# if "vector_index" not in index_names:
|
78 |
+
# # Create vector search index
|
79 |
+
# index_definition = {
|
80 |
+
# "mappings": {
|
81 |
+
# "dynamic": True,
|
82 |
+
# "fields": {
|
83 |
+
# "embedding": {
|
84 |
+
# "dimensions": settings.VECTOR_DIMENSION,
|
85 |
+
# "similarity": "cosine",
|
86 |
+
# "type": "knnVector"
|
87 |
+
# }
|
88 |
+
# }
|
89 |
+
# }
|
90 |
+
# }
|
91 |
+
|
92 |
+
# # Create the index
|
93 |
+
# await self.db.command({
|
94 |
+
# "createIndexes": self.vector_collection.name,
|
95 |
+
# "indexes": [
|
96 |
+
# {
|
97 |
+
# "name": "vector_index",
|
98 |
+
# "key": {"embedding": "vector"},
|
99 |
+
# "weights": {"embedding": 1},
|
100 |
+
# "vectorSearchOptions": index_definition
|
101 |
+
# }
|
102 |
+
# ]
|
103 |
+
# })
|
104 |
+
|
105 |
+
# # Create additional metadata indexes for filtering
|
106 |
+
# await self.vector_collection.create_index([("metadata.source", ASCENDING)])
|
107 |
+
# await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
|
108 |
+
|
109 |
+
# logger.info("Vector search index created")
|
110 |
+
# else:
|
111 |
+
# logger.info("Vector search index already exists")
|
112 |
+
|
113 |
+
# except Exception as e:
|
114 |
+
# logger.error(f"Error creating vector index: {str(e)}")
|
115 |
+
# raise
|
116 |
+
|
117 |
+
# async def close(self):
|
118 |
+
# """
|
119 |
+
# Close MongoDB connection.
|
120 |
+
# """
|
121 |
+
# if self.client:
|
122 |
+
# self.client.close()
|
123 |
+
# self.client = None
|
124 |
+
# self.db = None
|
125 |
+
# self.vector_collection = None
|
126 |
+
# self.initialized = False
|
127 |
+
# logger.info("MongoDB connection closed")
|
src/ctp_slack_bot/models/__init__.py
CHANGED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.models.base import Content, Ingestible, Metadata
|
2 |
+
from ctp_slack_bot.models.content import RetreivedContext
|
3 |
+
from ctp_slack_bot.models.slack import SlackMessage
|
4 |
+
from ctp_slack_bot.models.vector_query import VectorQuery
|
src/ctp_slack_bot/models/{VectorQuery.py → vector_query.py}
RENAMED
@@ -1,6 +1,5 @@
|
|
1 |
from pydantic import BaseModel, Field, validator
|
2 |
from typing import Optional, List, Dict, Any
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
|
5 |
class VectorQuery(BaseModel):
|
6 |
"""Model for vector database similarity search queries.
|
@@ -12,6 +11,6 @@ class VectorQuery(BaseModel):
|
|
12 |
filter_metadata: Optional filters for metadata fields
|
13 |
"""
|
14 |
query_text: str
|
15 |
-
k: int
|
16 |
score_threshold: float = Field(default=0.7)
|
17 |
-
filter_metadata: Optional[Dict[str, Any]] = None
|
|
|
1 |
from pydantic import BaseModel, Field, validator
|
2 |
from typing import Optional, List, Dict, Any
|
|
|
3 |
|
4 |
class VectorQuery(BaseModel):
|
5 |
"""Model for vector database similarity search queries.
|
|
|
11 |
filter_metadata: Optional filters for metadata fields
|
12 |
"""
|
13 |
query_text: str
|
14 |
+
k: int
|
15 |
score_threshold: float = Field(default=0.7)
|
16 |
+
filter_metadata: Optional[Dict[str, Any]] = None
|
src/ctp_slack_bot/services/__init__.py
CHANGED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
2 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
3 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
4 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
5 |
+
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
6 |
+
from ctp_slack_bot.services.slack_service import SlackService
|
7 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
8 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
src/ctp_slack_bot/services/{AnswerQuestionService.py → answer_retrieval_service.py}
RENAMED
@@ -1,17 +1,30 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
-
import numpy as np
|
5 |
from openai import OpenAI
|
6 |
-
from
|
7 |
-
from
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
"""
|
11 |
Service for language model operations.
|
12 |
"""
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
|
17 |
"""Generate a response using OpenAI's API with retrieved context.
|
@@ -50,11 +63,3 @@ class GenerateAnswer():
|
|
50 |
)
|
51 |
|
52 |
return response.choices[0].message.content
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
### REMOVE BELOW, PUT SOMEWHERE IN TESTS BUT IDK WHERE YET
|
57 |
-
# sm = SlackMessage(text="What is the capital of France?", channel_id="123", user_id="456", timestamp="789")
|
58 |
-
# context = [RetreivedContext(contextual_text="The capital of France is Paris", metadata_source="class materials", similarity_score=0.95)]
|
59 |
-
# a = GenerateAnswer()
|
60 |
-
# a.generate_answer(sm, context)
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
|
|
|
|
3 |
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
10 |
+
|
11 |
+
|
12 |
+
class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service.
|
13 |
"""
|
14 |
Service for language model operations.
|
15 |
"""
|
16 |
+
|
17 |
+
settings: Settings
|
18 |
+
event_brokerage_service: EventBrokerageService
|
19 |
+
client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
|
20 |
+
|
21 |
+
class Config:
|
22 |
+
arbitrary_types_allowed = True
|
23 |
+
|
24 |
+
@model_validator(mode='after')
|
25 |
+
def post_init(self: Self) -> Self:
|
26 |
+
logger.debug("Created {}", self.__class__.__name__)
|
27 |
+
return self
|
28 |
|
29 |
def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
|
30 |
"""Generate a response using OpenAI's API with retrieved context.
|
|
|
63 |
)
|
64 |
|
65 |
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/content_ingestion_service.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Self
|
4 |
+
|
5 |
+
from ctp_slack_bot.core import Settings
|
6 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
7 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
8 |
+
|
9 |
+
class ContentIngestionService(BaseModel):
|
10 |
+
"""
|
11 |
+
Service for ingesting content.
|
12 |
+
"""
|
13 |
+
|
14 |
+
settings: Settings
|
15 |
+
vector_database_service: VectorDatabaseService
|
16 |
+
vectorization_service: VectorizationService
|
17 |
+
|
18 |
+
@model_validator(mode='after')
|
19 |
+
def post_init(self: Self) -> Self:
|
20 |
+
logger.debug("Created {}", self.__class__.__name__)
|
21 |
+
return self
|
src/ctp_slack_bot/services/{ContextRetrievalService.py → context_retrieval_service.py}
RENAMED
@@ -1,29 +1,31 @@
|
|
1 |
-
import
|
2 |
-
from
|
|
|
3 |
|
4 |
-
from ctp_slack_bot.
|
5 |
-
from ctp_slack_bot.models
|
6 |
-
from ctp_slack_bot.
|
7 |
-
from ctp_slack_bot.services.
|
8 |
-
from ctp_slack_bot.services.VectorDatabaseService import VectorDatabaseService
|
9 |
-
from ctp_slack_bot.core.config import settings
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
class ContextRetrievalService:
|
14 |
"""
|
15 |
Service for retrieving relevant context from the vector database based on user questions.
|
16 |
"""
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
async def initialize(self):
|
23 |
"""
|
24 |
Initialize the required services.
|
25 |
"""
|
26 |
-
await self.
|
27 |
|
28 |
async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
|
29 |
"""
|
@@ -62,7 +64,7 @@ class ContextRetrievalService:
|
|
62 |
)
|
63 |
|
64 |
# Search for similar content in vector database
|
65 |
-
context_results = await self.
|
66 |
query=vector_query,
|
67 |
query_embedding=query_embedding
|
68 |
)
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Any, Dict, List, Optional, Self
|
4 |
|
5 |
+
from ctp_slack_bot.core.config import Settings
|
6 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery
|
7 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
8 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
|
|
|
9 |
|
10 |
+
class ContextRetrievalService(BaseModel):
|
|
|
|
|
11 |
"""
|
12 |
Service for retrieving relevant context from the vector database based on user questions.
|
13 |
"""
|
14 |
+
|
15 |
+
settings: Settings
|
16 |
+
vectorization_service: VectorizationService
|
17 |
+
vector_database_service: VectorDatabaseService
|
18 |
+
|
19 |
+
@model_validator(mode='after')
|
20 |
+
def post_init(self: Self) -> Self:
|
21 |
+
logger.debug("Created {}", self.__class__.__name__)
|
22 |
+
return self
|
23 |
|
24 |
async def initialize(self):
|
25 |
"""
|
26 |
Initialize the required services.
|
27 |
"""
|
28 |
+
await self.vector_database_service.initialize()
|
29 |
|
30 |
async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
|
31 |
"""
|
|
|
64 |
)
|
65 |
|
66 |
# Search for similar content in vector database
|
67 |
+
context_results = await self.vector_database_service.search_by_similarity(
|
68 |
query=vector_query,
|
69 |
query_embedding=query_embedding
|
70 |
)
|
src/ctp_slack_bot/services/event_brokerage_service.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import Any, Callable, Dict, List, Self
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
10 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
11 |
+
|
12 |
+
|
13 |
+
class EventBrokerageService(BaseModel):
|
14 |
+
"""
|
15 |
+
Service for brokering events between services.
|
16 |
+
"""
|
17 |
+
|
18 |
+
subscribers: Dict[str, List[Callable]] = {}
|
19 |
+
|
20 |
+
class Config:
|
21 |
+
arbitrary_types_allowed = True
|
22 |
+
|
23 |
+
@model_validator(mode='after')
|
24 |
+
def post_init(self: Self) -> Self:
|
25 |
+
logger.debug("Created {}", self.__class__.__name__)
|
26 |
+
return self
|
27 |
+
|
28 |
+
def subscribe(self: Self, event_type: str, callback: Callable) -> None:
|
29 |
+
"""Subscribe to an event type with a callback function."""
|
30 |
+
if event_type not in self.subscribers:
|
31 |
+
self.subscribers[event_type] = []
|
32 |
+
self.subscribers[event_type].append(callback)
|
33 |
+
|
34 |
+
def publish(self: Self, event_type: str, data: Any = None) -> None:
|
35 |
+
"""Publish an event with optional data to all subscribers."""
|
36 |
+
if event_type in self.subscribers:
|
37 |
+
for callback in self.subscribers[event_type]:
|
38 |
+
callback(data)
|
src/ctp_slack_bot/services/question_dispatch_service.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
10 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
11 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
12 |
+
|
13 |
+
|
14 |
+
class QuestionDispatchService(BaseModel):
|
15 |
+
"""
|
16 |
+
Service for determining whether a Slack message constitutes a question.
|
17 |
+
"""
|
18 |
+
|
19 |
+
settings: Settings
|
20 |
+
event_brokerage_service: EventBrokerageService
|
21 |
+
context_retrieval_service: ContextRetrievalService
|
22 |
+
answer_retrieval_service: AnswerRetrievalService
|
23 |
+
|
24 |
+
@model_validator(mode='after')
|
25 |
+
def post_init(self: Self) -> Self:
|
26 |
+
logger.debug("Created {}", self.__class__.__name__)
|
27 |
+
return self
|
28 |
+
|
29 |
+
def push(self: Self, message: SlackMessage) -> None:
|
30 |
+
context = self.context_retrieval_service.get_context(message)
|
31 |
+
self.answer_retrieval_service.generate_answer(message, context)
|
src/ctp_slack_bot/services/slack_service.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
10 |
+
|
11 |
+
|
12 |
+
class SlackService(BaseModel):
|
13 |
+
"""
|
14 |
+
Service for interfacing with Slack.
|
15 |
+
"""
|
16 |
+
|
17 |
+
settings: Settings
|
18 |
+
event_brokerage_service: EventBrokerageService
|
19 |
+
|
20 |
+
@model_validator(mode='after')
|
21 |
+
def post_init(self: Self) -> Self:
|
22 |
+
logger.debug("Created {}", self.__class__.__name__)
|
23 |
+
return self
|
src/ctp_slack_bot/services/{VectorDatabaseService.py → vector_database_service.py}
RENAMED
@@ -1,18 +1,24 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
|
4 |
|
5 |
-
from ctp_slack_bot.
|
6 |
-
from ctp_slack_bot.
|
7 |
-
from ctp_slack_bot.models
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
class VectorDatabaseService:
|
12 |
"""
|
13 |
Service for storing and retrieving vector embeddings from MongoDB.
|
14 |
"""
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
async def initialize(self):
|
17 |
"""
|
18 |
Initialize the database connection.
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Any, Dict, List, Optional, Self
|
4 |
|
5 |
+
from ctp_slack_bot.core import Settings
|
6 |
+
from ctp_slack_bot.db import MongoDB
|
7 |
+
from ctp_slack_bot.models import VectorQuery, RetreivedContext
|
8 |
|
9 |
+
class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
|
|
|
|
|
10 |
"""
|
11 |
Service for storing and retrieving vector embeddings from MongoDB.
|
12 |
"""
|
13 |
+
|
14 |
+
settings: Settings
|
15 |
+
mongo_db: MongoDB
|
16 |
+
|
17 |
+
@model_validator(mode='after')
|
18 |
+
def post_init(self: Self) -> Self:
|
19 |
+
logger.debug("Created {}", self.__class__.__name__)
|
20 |
+
return self
|
21 |
+
|
22 |
async def initialize(self):
|
23 |
"""
|
24 |
Initialize the database connection.
|
src/ctp_slack_bot/services/{VectorizationService.py → vectorization_service.py}
RENAMED
@@ -1,17 +1,26 @@
|
|
1 |
-
from
|
2 |
-
from typing import List, Optional
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
import numpy as np
|
5 |
from openai import OpenAI
|
|
|
|
|
6 |
|
|
|
7 |
|
8 |
-
class VectorizationService():
|
9 |
"""
|
10 |
Service for vectorizing chunks of text data.
|
11 |
"""
|
12 |
-
def __init__(self):
|
13 |
-
self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def get_embeddings(self, texts: List[str]) -> np.ndarray:
|
17 |
"""Get embeddings for a list of texts using OpenAI's API.
|
@@ -28,7 +37,7 @@ class VectorizationService():
|
|
28 |
try:
|
29 |
# Use the initialized client instead of the global openai module
|
30 |
response = self.client.embeddings.create(
|
31 |
-
model=settings.EMBEDDING_MODEL,
|
32 |
input=texts,
|
33 |
encoding_format="float" # Ensure we get raw float values
|
34 |
)
|
@@ -36,9 +45,9 @@ class VectorizationService():
|
|
36 |
# Extract embeddings and verify dimensions
|
37 |
embeddings = np.array([data.embedding for data in response.data])
|
38 |
|
39 |
-
if embeddings.shape[1] != settings.VECTOR_DIMENSION:
|
40 |
raise ValueError(
|
41 |
-
f"Embedding dimension mismatch. Expected {settings.VECTOR_DIMENSION}, "
|
42 |
f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
|
43 |
f"in config.py to match the model's output."
|
44 |
)
|
|
|
1 |
+
from loguru import logger
|
|
|
|
|
2 |
import numpy as np
|
3 |
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self
|
6 |
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
|
9 |
+
class VectorizationService(BaseModel):
|
10 |
"""
|
11 |
Service for vectorizing chunks of text data.
|
12 |
"""
|
|
|
|
|
13 |
|
14 |
+
settings: Settings
|
15 |
+
client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
|
16 |
+
|
17 |
+
class Config:
|
18 |
+
arbitrary_types_allowed = True
|
19 |
+
|
20 |
+
@model_validator(mode='after')
|
21 |
+
def post_init(self: Self) -> Self:
|
22 |
+
logger.debug("Created {}", self.__class__.__name__)
|
23 |
+
return self
|
24 |
|
25 |
def get_embeddings(self, texts: List[str]) -> np.ndarray:
|
26 |
"""Get embeddings for a list of texts using OpenAI's API.
|
|
|
37 |
try:
|
38 |
# Use the initialized client instead of the global openai module
|
39 |
response = self.client.embeddings.create(
|
40 |
+
model=self.settings.EMBEDDING_MODEL,
|
41 |
input=texts,
|
42 |
encoding_format="float" # Ensure we get raw float values
|
43 |
)
|
|
|
45 |
# Extract embeddings and verify dimensions
|
46 |
embeddings = np.array([data.embedding for data in response.data])
|
47 |
|
48 |
+
if embeddings.shape[1] != self.settings.VECTOR_DIMENSION:
|
49 |
raise ValueError(
|
50 |
+
f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, "
|
51 |
f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
|
52 |
f"in config.py to match the model's output."
|
53 |
)
|
src/ctp_slack_bot/tasks/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
src/ctp_slack_bot/tasks/scheduler.py
CHANGED
@@ -1,28 +1,26 @@
|
|
1 |
-
from datetime import datetime
|
2 |
-
from typing import Optional
|
3 |
-
|
4 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
5 |
from apscheduler.triggers.cron import CronTrigger
|
|
|
|
|
6 |
from loguru import logger
|
7 |
from pytz import timezone
|
|
|
8 |
|
9 |
-
from ctp_slack_bot
|
10 |
-
#from ctp_slack_bot.tasks.error_report import send_error_report
|
11 |
-
#from ctp_slack_bot.tasks.transcript_cleanup import cleanup_old_transcripts
|
12 |
-
|
13 |
|
14 |
-
|
|
|
15 |
"""
|
16 |
Start and configure the APScheduler instance.
|
17 |
|
18 |
Returns:
|
19 |
AsyncIOScheduler: Configured scheduler instance
|
20 |
"""
|
21 |
-
|
|
|
|
|
22 |
|
23 |
-
# Add jobs to the scheduler
|
24 |
-
|
25 |
-
# Daily error report at 7 AM
|
26 |
# scheduler.add_job(
|
27 |
# send_error_report,
|
28 |
# CronTrigger(hour=7, minute=0),
|
@@ -30,8 +28,6 @@ def start_scheduler() -> AsyncIOScheduler:
|
|
30 |
# name="Daily Error Report",
|
31 |
# replace_existing=True,
|
32 |
# )
|
33 |
-
|
34 |
-
# Weekly transcript cleanup on Sundays at 1 AM
|
35 |
# scheduler.add_job(
|
36 |
# cleanup_old_transcripts,
|
37 |
# CronTrigger(day_of_week="sun", hour=1, minute=0),
|
@@ -40,25 +36,25 @@ def start_scheduler() -> AsyncIOScheduler:
|
|
40 |
# replace_existing=True,
|
41 |
# )
|
42 |
|
43 |
-
# Start the scheduler
|
44 |
scheduler.start()
|
45 |
logger.info("Scheduler started with timezone: {}", settings.SCHEDULER_TIMEZONE)
|
46 |
-
logger.info("Next run for error report: {}",
|
47 |
-
|
48 |
-
logger.info("Next run for transcript cleanup: {}",
|
49 |
-
|
50 |
|
51 |
return scheduler
|
52 |
|
53 |
|
54 |
-
def stop_scheduler(scheduler:
|
55 |
"""
|
56 |
Shutdown the scheduler gracefully.
|
57 |
|
58 |
Args:
|
59 |
scheduler: The scheduler instance to shut down
|
60 |
"""
|
61 |
-
if scheduler
|
62 |
logger.info("Shutting down scheduler")
|
63 |
scheduler.shutdown(wait=False)
|
64 |
logger.info("Scheduler shutdown complete")
|
|
|
|
|
|
|
|
|
1 |
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
2 |
from apscheduler.triggers.cron import CronTrigger
|
3 |
+
from datetime import datetime
|
4 |
+
from dependency_injector.wiring import inject, Provide
|
5 |
from loguru import logger
|
6 |
from pytz import timezone
|
7 |
+
from typing import Optional
|
8 |
|
9 |
+
from ctp_slack_bot import Container
|
|
|
|
|
|
|
10 |
|
11 |
+
@inject
|
12 |
+
def start_scheduler(container: Container) -> AsyncIOScheduler:
|
13 |
"""
|
14 |
Start and configure the APScheduler instance.
|
15 |
|
16 |
Returns:
|
17 |
AsyncIOScheduler: Configured scheduler instance
|
18 |
"""
|
19 |
+
settings = container.settings() if container else Provide[Container.settings]
|
20 |
+
zone = settings.SCHEDULER_TIMEZONE
|
21 |
+
scheduler = AsyncIOScheduler(timezone=timezone(zone))
|
22 |
|
23 |
+
# Add jobs to the scheduler.
|
|
|
|
|
24 |
# scheduler.add_job(
|
25 |
# send_error_report,
|
26 |
# CronTrigger(hour=7, minute=0),
|
|
|
28 |
# name="Daily Error Report",
|
29 |
# replace_existing=True,
|
30 |
# )
|
|
|
|
|
31 |
# scheduler.add_job(
|
32 |
# cleanup_old_transcripts,
|
33 |
# CronTrigger(day_of_week="sun", hour=1, minute=0),
|
|
|
36 |
# replace_existing=True,
|
37 |
# )
|
38 |
|
39 |
+
# Start the scheduler.
|
40 |
scheduler.start()
|
41 |
logger.info("Scheduler started with timezone: {}", settings.SCHEDULER_TIMEZONE)
|
42 |
+
# logger.info("Next run for error report: {}",
|
43 |
+
# scheduler.get_job("daily_error_report").next_run_time)
|
44 |
+
# logger.info("Next run for transcript cleanup: {}",
|
45 |
+
# scheduler.get_job("weekly_transcript_cleanup").next_run_time)
|
46 |
|
47 |
return scheduler
|
48 |
|
49 |
|
50 |
+
def stop_scheduler(scheduler: AsyncIOScheduler) -> None:
|
51 |
"""
|
52 |
Shutdown the scheduler gracefully.
|
53 |
|
54 |
Args:
|
55 |
scheduler: The scheduler instance to shut down
|
56 |
"""
|
57 |
+
if scheduler.running:
|
58 |
logger.info("Shutting down scheduler")
|
59 |
scheduler.shutdown(wait=False)
|
60 |
logger.info("Scheduler shutdown complete")
|