Spaces:
Runtime error
Runtime error
Make all code participate in dependency injection
Browse files- README.MD +11 -2
- src/ctp_slack_bot/__init__.py +1 -0
- src/ctp_slack_bot/api/__init__.py +1 -0
- src/ctp_slack_bot/api/main.py +4 -11
- src/ctp_slack_bot/api/routes.py +5 -6
- src/ctp_slack_bot/containers.py +28 -43
- src/ctp_slack_bot/core/__init__.py +2 -0
- src/ctp_slack_bot/core/logging.py +2 -4
- src/ctp_slack_bot/db/MongoDB.py +0 -122
- src/ctp_slack_bot/db/__init__.py +1 -0
- src/ctp_slack_bot/db/mongo_db.py +127 -0
- src/ctp_slack_bot/models/__init__.py +4 -0
- src/ctp_slack_bot/models/{VectorQuery.py β vector_query.py} +2 -3
- src/ctp_slack_bot/services/__init__.py +8 -0
- src/ctp_slack_bot/services/{AnswerQuestionService.py β answer_retrieval_service.py} +22 -17
- src/ctp_slack_bot/services/content_ingestion_service.py +21 -0
- src/ctp_slack_bot/services/{ContextRetrievalService.py β context_retrieval_service.py} +19 -17
- src/ctp_slack_bot/services/event_brokerage_service.py +38 -0
- src/ctp_slack_bot/services/question_dispatch_service.py +31 -0
- src/ctp_slack_bot/services/slack_service.py +23 -0
- src/ctp_slack_bot/services/{VectorDatabaseService.py β vector_database_service.py} +16 -10
- src/ctp_slack_bot/services/{VectorizationService.py β vectorization_service.py} +18 -9
- src/ctp_slack_bot/tasks/__init__.py +1 -0
- src/ctp_slack_bot/tasks/scheduler.py +1 -3
README.MD
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
# CTP Slack Bot
|
2 |
|
|
|
|
|
|
|
|
|
|
|
3 |
## Tech Stack
|
4 |
|
5 |
* Hugging Face Spaces for hosting and serverless API
|
@@ -7,7 +12,10 @@
|
|
7 |
* MongoDB for data persistence
|
8 |
* Docker for containerization
|
9 |
* Python
|
10 |
-
*
|
|
|
|
|
|
|
11 |
|
12 |
## General Project Structure
|
13 |
|
@@ -26,7 +34,8 @@
|
|
26 |
* `scripts/`: utility scripts for development, deployment, etc.
|
27 |
* `run-dev.sh`: script to run the application locally
|
28 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
29 |
-
* `.env`: local environment variables for development purposes
|
|
|
30 |
|
31 |
## How to Run the Application
|
32 |
|
|
|
1 |
# CTP Slack Bot
|
2 |
|
3 |
+
## _Modus Operandi_ in a Nutshell
|
4 |
+
|
5 |
+
* Intelligently responds to Slack messages based on a repository of data.
|
6 |
+
* Periodically checks for new content to add to its repository.
|
7 |
+
|
8 |
## Tech Stack
|
9 |
|
10 |
* Hugging Face Spaces for hosting and serverless API
|
|
|
12 |
* MongoDB for data persistence
|
13 |
* Docker for containerization
|
14 |
* Python
|
15 |
+
* FastAPI for serving HTTP requests
|
16 |
+
* httpx for making HTTP requests
|
17 |
+
* APScheduler for running periodic tasks in the background
|
18 |
+
* See `pyproject.toml` for additional Python packages.
|
19 |
|
20 |
## General Project Structure
|
21 |
|
|
|
34 |
* `scripts/`: utility scripts for development, deployment, etc.
|
35 |
* `run-dev.sh`: script to run the application locally
|
36 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
37 |
+
* `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`)
|
38 |
+
* `Dockerfile`: Docker container build definition
|
39 |
|
40 |
## How to Run the Application
|
41 |
|
src/ctp_slack_bot/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.containers import Container
|
src/ctp_slack_bot/api/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.api.main import app, run
|
src/ctp_slack_bot/api/main.py
CHANGED
@@ -4,12 +4,11 @@ from loguru import logger
|
|
4 |
from typing import AsyncGenerator
|
5 |
from dependency_injector.wiring import inject, Provide
|
6 |
|
|
|
7 |
from ctp_slack_bot.api.routes import router
|
8 |
-
from ctp_slack_bot.core
|
9 |
-
from ctp_slack_bot.core.logging import setup_logging
|
10 |
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
11 |
-
from ctp_slack_bot.tasks
|
12 |
-
from ctp_slack_bot.containers import Container
|
13 |
|
14 |
@asynccontextmanager
|
15 |
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
@@ -19,14 +18,8 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
|
|
19 |
"""
|
20 |
# Initialize container and wire the container to modules that need dependency injection.
|
21 |
container = Container()
|
|
|
22 |
app.container = container
|
23 |
-
container.wire(
|
24 |
-
modules=[
|
25 |
-
"ctp_slack_bot.api.routes",
|
26 |
-
"ctp_slack_bot.services",
|
27 |
-
"ctp_slack_bot.tasks"
|
28 |
-
]
|
29 |
-
)
|
30 |
|
31 |
# Setup logging.
|
32 |
setup_logging(container)
|
|
|
4 |
from typing import AsyncGenerator
|
5 |
from dependency_injector.wiring import inject, Provide
|
6 |
|
7 |
+
from ctp_slack_bot import Container
|
8 |
from ctp_slack_bot.api.routes import router
|
9 |
+
from ctp_slack_bot.core import Settings, setup_logging
|
|
|
10 |
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
11 |
+
from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
|
|
|
12 |
|
13 |
@asynccontextmanager
|
14 |
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
|
|
18 |
"""
|
19 |
# Initialize container and wire the container to modules that need dependency injection.
|
20 |
container = Container()
|
21 |
+
container.wire(packages=['ctp_slack_bot'])
|
22 |
app.container = container
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Setup logging.
|
25 |
setup_logging(container)
|
src/ctp_slack_bot/api/routes.py
CHANGED
@@ -2,18 +2,17 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
|
2 |
from dependency_injector.wiring import inject, Provide
|
3 |
from loguru import logger
|
4 |
|
5 |
-
from ctp_slack_bot
|
6 |
-
|
7 |
-
from ctp_slack_bot.
|
8 |
-
#from ctp_slack_bot.models.transcript import TranscriptRequest, TranscriptResponse
|
9 |
-
#from ctp_slack_bot.services.slack_service import SlackService
|
10 |
-
#from ctp_slack_bot.services.transcript_service import TranscriptService
|
11 |
|
12 |
router = APIRouter(prefix="/api/v1")
|
13 |
|
14 |
@router.get("/env", response_model=Settings)
|
15 |
@inject
|
16 |
async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
|
|
|
|
|
17 |
return settings
|
18 |
|
19 |
# @router.post("/transcripts/analyze", response_model=TranscriptResponse)
|
|
|
2 |
from dependency_injector.wiring import inject, Provide
|
3 |
from loguru import logger
|
4 |
|
5 |
+
from ctp_slack_bot import Container
|
6 |
+
from ctp_slack_bot.core import Settings
|
7 |
+
from ctp_slack_bot.services import SlackService
|
|
|
|
|
|
|
8 |
|
9 |
router = APIRouter(prefix="/api/v1")
|
10 |
|
11 |
@router.get("/env", response_model=Settings)
|
12 |
@inject
|
13 |
async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings:
|
14 |
+
if not settings.DEBUG:
|
15 |
+
raise HTTPException(status_code=404)
|
16 |
return settings
|
17 |
|
18 |
# @router.post("/transcripts/analyze", response_model=TranscriptResponse)
|
src/ctp_slack_bot/containers.py
CHANGED
@@ -1,43 +1,25 @@
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
-
from dependency_injector.providers import
|
3 |
-
|
4 |
-
#from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
-
#from transformers import AutoTokenizer, AutoModel
|
6 |
|
7 |
from ctp_slack_bot.core.config import Settings
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
class Container(DeclarativeContainer):
|
12 |
-
# Core dependencies
|
13 |
settings = Singleton(Settings)
|
14 |
|
15 |
-
|
16 |
-
# MongoClient,
|
17 |
-
# host=config.MONGODB_URI.get_secret_value
|
18 |
-
# )
|
19 |
-
|
20 |
-
# db = Singleton(
|
21 |
-
# get_database,
|
22 |
-
# client=db_client,
|
23 |
-
# db_name=config.MONGODB_NAME
|
24 |
-
# )
|
25 |
-
|
26 |
-
# Machine-learning models
|
27 |
-
# tokenizer = Singleton(
|
28 |
-
# AutoTokenizer.from_pretrained,
|
29 |
-
# config.EMBEDDING_MODEL
|
30 |
-
# )
|
31 |
-
|
32 |
-
# model = Singleton(
|
33 |
-
# AutoModel.from_pretrained,
|
34 |
-
# config.EMBEDDING_MODEL
|
35 |
-
# )
|
36 |
|
37 |
-
|
38 |
-
# HuggingFaceEmbeddings,
|
39 |
-
# model_name=config.EMBEDDING_MODEL
|
40 |
-
# )
|
41 |
|
42 |
# Repositories
|
43 |
# transcript_repository = Factory(
|
@@ -45,15 +27,18 @@ class Container(DeclarativeContainer):
|
|
45 |
# db=db
|
46 |
# )
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
# transcript_service = providers.Factory(...)
|
59 |
-
# slack_service = providers.Factory(...)
|
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
+
from dependency_injector.providers import Factory, Singleton
|
3 |
+
from openai import OpenAI
|
|
|
|
|
4 |
|
5 |
from ctp_slack_bot.core.config import Settings
|
6 |
+
from ctp_slack_bot.db.mongo_db import MongoDB
|
7 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
8 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
9 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
10 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
11 |
+
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
12 |
+
from ctp_slack_bot.services.slack_service import SlackService
|
13 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
14 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
15 |
+
|
16 |
|
17 |
class Container(DeclarativeContainer):
|
|
|
18 |
settings = Singleton(Settings)
|
19 |
|
20 |
+
event_brokerage_service = Singleton(EventBrokerageService)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
mongo_db = Singleton(MongoDB, settings=settings)
|
|
|
|
|
|
|
23 |
|
24 |
# Repositories
|
25 |
# transcript_repository = Factory(
|
|
|
27 |
# db=db
|
28 |
# )
|
29 |
|
30 |
+
open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client.
|
31 |
+
|
32 |
+
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
33 |
+
|
34 |
+
vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client)
|
35 |
+
|
36 |
+
content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
|
37 |
+
|
38 |
+
context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
|
39 |
+
|
40 |
+
answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client)
|
41 |
+
|
42 |
+
question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
|
43 |
|
44 |
+
slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service)
|
|
|
|
src/ctp_slack_bot/core/__init__.py
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.core.config import Settings
|
2 |
+
from ctp_slack_bot.core.logging import logger, setup_logging
|
src/ctp_slack_bot/core/logging.py
CHANGED
@@ -3,9 +3,6 @@ from loguru import logger
|
|
3 |
from sys import stderr
|
4 |
from typing import Dict, Union
|
5 |
|
6 |
-
from ctp_slack_bot.containers import Container
|
7 |
-
|
8 |
-
|
9 |
class InterceptHandler(Handler):
|
10 |
"""
|
11 |
Intercept standard logging messages toward Loguru.
|
@@ -32,7 +29,7 @@ class InterceptHandler(Handler):
|
|
32 |
)
|
33 |
|
34 |
|
35 |
-
def setup_logging(container: Container) -> None:
|
36 |
"""
|
37 |
Configure logging with Loguru.
|
38 |
|
@@ -40,6 +37,7 @@ def setup_logging(container: Container) -> None:
|
|
40 |
configures the log format based on settings, and intercepts
|
41 |
standard logging messages.
|
42 |
"""
|
|
|
43 |
settings = container.settings() if container else Provide[Container.settings]
|
44 |
|
45 |
# Remove default loguru handler
|
|
|
3 |
from sys import stderr
|
4 |
from typing import Dict, Union
|
5 |
|
|
|
|
|
|
|
6 |
class InterceptHandler(Handler):
|
7 |
"""
|
8 |
Intercept standard logging messages toward Loguru.
|
|
|
29 |
)
|
30 |
|
31 |
|
32 |
+
def setup_logging(container: "Container") -> None:
|
33 |
"""
|
34 |
Configure logging with Loguru.
|
35 |
|
|
|
37 |
configures the log format based on settings, and intercepts
|
38 |
standard logging messages.
|
39 |
"""
|
40 |
+
from ctp_slack_bot import Container
|
41 |
settings = container.settings() if container else Provide[Container.settings]
|
42 |
|
43 |
# Remove default loguru handler
|
src/ctp_slack_bot/db/MongoDB.py
DELETED
@@ -1,122 +0,0 @@
|
|
1 |
-
from motor.motor_asyncio import AsyncIOMotorClient
|
2 |
-
from pymongo import IndexModel, ASCENDING
|
3 |
-
import logging
|
4 |
-
from typing import Optional
|
5 |
-
|
6 |
-
from ctp_slack_bot.core.config import settings
|
7 |
-
|
8 |
-
logger = logging.getLogger(__name__)
|
9 |
-
|
10 |
-
class MongoDB:
|
11 |
-
"""
|
12 |
-
MongoDB connection and initialization class.
|
13 |
-
Handles connection to MongoDB, database selection, and index creation.
|
14 |
-
"""
|
15 |
-
def __init__(self):
|
16 |
-
self.client: Optional[AsyncIOMotorClient] = None
|
17 |
-
self.db = None
|
18 |
-
self.vector_collection = None
|
19 |
-
self.initialized = False
|
20 |
-
|
21 |
-
async def connect(self):
|
22 |
-
"""
|
23 |
-
Connect to MongoDB using connection string from settings.
|
24 |
-
"""
|
25 |
-
if self.client is not None:
|
26 |
-
return
|
27 |
-
|
28 |
-
if not settings.MONGODB_URI:
|
29 |
-
raise ValueError("MONGODB_URI is not set in environment variables")
|
30 |
-
|
31 |
-
try:
|
32 |
-
# Create MongoDB connection
|
33 |
-
self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
|
34 |
-
self.db = self.client[settings.MONGODB_DB_NAME]
|
35 |
-
self.vector_collection = self.db["vector_store"]
|
36 |
-
logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
|
37 |
-
except Exception as e:
|
38 |
-
logger.error(f"Error connecting to MongoDB: {str(e)}")
|
39 |
-
raise
|
40 |
-
|
41 |
-
async def initialize(self):
|
42 |
-
"""
|
43 |
-
Initialize MongoDB with required collections and indexes.
|
44 |
-
"""
|
45 |
-
if self.initialized:
|
46 |
-
return
|
47 |
-
|
48 |
-
if not self.client:
|
49 |
-
await self.connect()
|
50 |
-
|
51 |
-
try:
|
52 |
-
# Create vector index for similarity search
|
53 |
-
await self.create_vector_index()
|
54 |
-
self.initialized = True
|
55 |
-
logger.info("MongoDB initialized successfully")
|
56 |
-
except Exception as e:
|
57 |
-
logger.error(f"Error initializing MongoDB: {str(e)}")
|
58 |
-
raise
|
59 |
-
|
60 |
-
async def create_vector_index(self):
|
61 |
-
"""
|
62 |
-
Create vector index for similarity search using MongoDB Atlas Vector Search.
|
63 |
-
"""
|
64 |
-
try:
|
65 |
-
# Check if index already exists
|
66 |
-
existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
|
67 |
-
index_names = [index.get('name') for index in existing_indexes]
|
68 |
-
|
69 |
-
if "vector_index" not in index_names:
|
70 |
-
# Create vector search index
|
71 |
-
index_definition = {
|
72 |
-
"mappings": {
|
73 |
-
"dynamic": True,
|
74 |
-
"fields": {
|
75 |
-
"embedding": {
|
76 |
-
"dimensions": settings.VECTOR_DIMENSION,
|
77 |
-
"similarity": "cosine",
|
78 |
-
"type": "knnVector"
|
79 |
-
}
|
80 |
-
}
|
81 |
-
}
|
82 |
-
}
|
83 |
-
|
84 |
-
# Create the index
|
85 |
-
await self.db.command({
|
86 |
-
"createIndexes": self.vector_collection.name,
|
87 |
-
"indexes": [
|
88 |
-
{
|
89 |
-
"name": "vector_index",
|
90 |
-
"key": {"embedding": "vector"},
|
91 |
-
"weights": {"embedding": 1},
|
92 |
-
"vectorSearchOptions": index_definition
|
93 |
-
}
|
94 |
-
]
|
95 |
-
})
|
96 |
-
|
97 |
-
# Create additional metadata indexes for filtering
|
98 |
-
await self.vector_collection.create_index([("metadata.source", ASCENDING)])
|
99 |
-
await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
|
100 |
-
|
101 |
-
logger.info("Vector search index created")
|
102 |
-
else:
|
103 |
-
logger.info("Vector search index already exists")
|
104 |
-
|
105 |
-
except Exception as e:
|
106 |
-
logger.error(f"Error creating vector index: {str(e)}")
|
107 |
-
raise
|
108 |
-
|
109 |
-
async def close(self):
|
110 |
-
"""
|
111 |
-
Close MongoDB connection.
|
112 |
-
"""
|
113 |
-
if self.client:
|
114 |
-
self.client.close()
|
115 |
-
self.client = None
|
116 |
-
self.db = None
|
117 |
-
self.vector_collection = None
|
118 |
-
self.initialized = False
|
119 |
-
logger.info("MongoDB connection closed")
|
120 |
-
|
121 |
-
# Create a singleton instance
|
122 |
-
mongodb = MongoDB()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/db/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.db.mongo_db import MongoDB
|
src/ctp_slack_bot/db/mongo_db.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#from motor.motor_asyncio import AsyncIOMotorClient
|
2 |
+
from loguru import logger
|
3 |
+
from pydantic import BaseModel, model_validator
|
4 |
+
#from pymongo import IndexModel, ASCENDING
|
5 |
+
from typing import Optional, Self
|
6 |
+
|
7 |
+
from ctp_slack_bot.core.config import Settings
|
8 |
+
|
9 |
+
class MongoDB(BaseModel):
|
10 |
+
"""
|
11 |
+
MongoDB connection and initialization class.
|
12 |
+
Handles connection to MongoDB, database selection, and index creation.
|
13 |
+
"""
|
14 |
+
|
15 |
+
settings: Settings
|
16 |
+
|
17 |
+
@model_validator(mode='after')
|
18 |
+
def post_init(self: Self) -> Self:
|
19 |
+
logger.debug("Created {}", self.__class__.__name__)
|
20 |
+
return self
|
21 |
+
|
22 |
+
def __init__(self: Self, settings: Settings) -> Self:
|
23 |
+
#self.client: Optional[AsyncIOMotorClient] = None
|
24 |
+
#self.db = None
|
25 |
+
#self.vector_collection = None
|
26 |
+
#self.initialized = False
|
27 |
+
pass # The above initialization needs to be done some other way.
|
28 |
+
|
29 |
+
# async def connect(self):
|
30 |
+
# """
|
31 |
+
# Connect to MongoDB using connection string from settings.
|
32 |
+
# """
|
33 |
+
# if self.client is not None:
|
34 |
+
# return
|
35 |
+
|
36 |
+
# if not settings.MONGODB_URI:
|
37 |
+
# raise ValueError("MONGODB_URI is not set in environment variables")
|
38 |
+
|
39 |
+
# try:
|
40 |
+
# # Create MongoDB connection
|
41 |
+
# self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
|
42 |
+
# self.db = self.client[settings.MONGODB_DB_NAME]
|
43 |
+
# self.vector_collection = self.db["vector_store"]
|
44 |
+
# logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
|
45 |
+
# except Exception as e:
|
46 |
+
# logger.error(f"Error connecting to MongoDB: {str(e)}")
|
47 |
+
# raise
|
48 |
+
|
49 |
+
# async def initialize(self):
|
50 |
+
# """
|
51 |
+
# Initialize MongoDB with required collections and indexes.
|
52 |
+
# """
|
53 |
+
# if self.initialized:
|
54 |
+
# return
|
55 |
+
|
56 |
+
# if not self.client:
|
57 |
+
# await self.connect()
|
58 |
+
|
59 |
+
# try:
|
60 |
+
# # Create vector index for similarity search
|
61 |
+
# await self.create_vector_index()
|
62 |
+
# self.initialized = True
|
63 |
+
# logger.info("MongoDB initialized successfully")
|
64 |
+
# except Exception as e:
|
65 |
+
# logger.error(f"Error initializing MongoDB: {str(e)}")
|
66 |
+
# raise
|
67 |
+
|
68 |
+
# async def create_vector_index(self):
|
69 |
+
# """
|
70 |
+
# Create vector index for similarity search using MongoDB Atlas Vector Search.
|
71 |
+
# """
|
72 |
+
# try:
|
73 |
+
# # Check if index already exists
|
74 |
+
# existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
|
75 |
+
# index_names = [index.get('name') for index in existing_indexes]
|
76 |
+
|
77 |
+
# if "vector_index" not in index_names:
|
78 |
+
# # Create vector search index
|
79 |
+
# index_definition = {
|
80 |
+
# "mappings": {
|
81 |
+
# "dynamic": True,
|
82 |
+
# "fields": {
|
83 |
+
# "embedding": {
|
84 |
+
# "dimensions": settings.VECTOR_DIMENSION,
|
85 |
+
# "similarity": "cosine",
|
86 |
+
# "type": "knnVector"
|
87 |
+
# }
|
88 |
+
# }
|
89 |
+
# }
|
90 |
+
# }
|
91 |
+
|
92 |
+
# # Create the index
|
93 |
+
# await self.db.command({
|
94 |
+
# "createIndexes": self.vector_collection.name,
|
95 |
+
# "indexes": [
|
96 |
+
# {
|
97 |
+
# "name": "vector_index",
|
98 |
+
# "key": {"embedding": "vector"},
|
99 |
+
# "weights": {"embedding": 1},
|
100 |
+
# "vectorSearchOptions": index_definition
|
101 |
+
# }
|
102 |
+
# ]
|
103 |
+
# })
|
104 |
+
|
105 |
+
# # Create additional metadata indexes for filtering
|
106 |
+
# await self.vector_collection.create_index([("metadata.source", ASCENDING)])
|
107 |
+
# await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
|
108 |
+
|
109 |
+
# logger.info("Vector search index created")
|
110 |
+
# else:
|
111 |
+
# logger.info("Vector search index already exists")
|
112 |
+
|
113 |
+
# except Exception as e:
|
114 |
+
# logger.error(f"Error creating vector index: {str(e)}")
|
115 |
+
# raise
|
116 |
+
|
117 |
+
# async def close(self):
|
118 |
+
# """
|
119 |
+
# Close MongoDB connection.
|
120 |
+
# """
|
121 |
+
# if self.client:
|
122 |
+
# self.client.close()
|
123 |
+
# self.client = None
|
124 |
+
# self.db = None
|
125 |
+
# self.vector_collection = None
|
126 |
+
# self.initialized = False
|
127 |
+
# logger.info("MongoDB connection closed")
|
src/ctp_slack_bot/models/__init__.py
CHANGED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.models.base import Content, Ingestible, Metadata
|
2 |
+
from ctp_slack_bot.models.content import RetreivedContext
|
3 |
+
from ctp_slack_bot.models.slack import SlackMessage
|
4 |
+
from ctp_slack_bot.models.vector_query import VectorQuery
|
src/ctp_slack_bot/models/{VectorQuery.py β vector_query.py}
RENAMED
@@ -1,6 +1,5 @@
|
|
1 |
from pydantic import BaseModel, Field, validator
|
2 |
from typing import Optional, List, Dict, Any
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
|
5 |
class VectorQuery(BaseModel):
|
6 |
"""Model for vector database similarity search queries.
|
@@ -12,6 +11,6 @@ class VectorQuery(BaseModel):
|
|
12 |
filter_metadata: Optional filters for metadata fields
|
13 |
"""
|
14 |
query_text: str
|
15 |
-
k: int
|
16 |
score_threshold: float = Field(default=0.7)
|
17 |
-
filter_metadata: Optional[Dict[str, Any]] = None
|
|
|
1 |
from pydantic import BaseModel, Field, validator
|
2 |
from typing import Optional, List, Dict, Any
|
|
|
3 |
|
4 |
class VectorQuery(BaseModel):
|
5 |
"""Model for vector database similarity search queries.
|
|
|
11 |
filter_metadata: Optional filters for metadata fields
|
12 |
"""
|
13 |
query_text: str
|
14 |
+
k: int
|
15 |
score_threshold: float = Field(default=0.7)
|
16 |
+
filter_metadata: Optional[Dict[str, Any]] = None
|
src/ctp_slack_bot/services/__init__.py
CHANGED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
2 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
3 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
4 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
5 |
+
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
6 |
+
from ctp_slack_bot.services.slack_service import SlackService
|
7 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
8 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
src/ctp_slack_bot/services/{AnswerQuestionService.py β answer_retrieval_service.py}
RENAMED
@@ -1,17 +1,30 @@
|
|
1 |
-
from
|
2 |
-
from
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
-
import numpy as np
|
5 |
from openai import OpenAI
|
6 |
-
from
|
7 |
-
from
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
"""
|
11 |
Service for language model operations.
|
12 |
"""
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
|
17 |
"""Generate a response using OpenAI's API with retrieved context.
|
@@ -50,11 +63,3 @@ class GenerateAnswer():
|
|
50 |
)
|
51 |
|
52 |
return response.choices[0].message.content
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
### REMOVE BELOW, PUT SOMEWHERE IN TESTS BUT IDK WHERE YET
|
57 |
-
# sm = SlackMessage(text="What is the capital of France?", channel_id="123", user_id="456", timestamp="789")
|
58 |
-
# context = [RetreivedContext(contextual_text="The capital of France is Paris", metadata_source="class materials", similarity_score=0.95)]
|
59 |
-
# a = GenerateAnswer()
|
60 |
-
# a.generate_answer(sm, context)
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
|
|
|
|
3 |
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
10 |
+
|
11 |
+
|
12 |
+
class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service.
|
13 |
"""
|
14 |
Service for language model operations.
|
15 |
"""
|
16 |
+
|
17 |
+
settings: Settings
|
18 |
+
event_brokerage_service: EventBrokerageService
|
19 |
+
client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
|
20 |
+
|
21 |
+
class Config:
|
22 |
+
arbitrary_types_allowed = True
|
23 |
+
|
24 |
+
@model_validator(mode='after')
|
25 |
+
def post_init(self: Self) -> Self:
|
26 |
+
logger.debug("Created {}", self.__class__.__name__)
|
27 |
+
return self
|
28 |
|
29 |
def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
|
30 |
"""Generate a response using OpenAI's API with retrieved context.
|
|
|
63 |
)
|
64 |
|
65 |
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/content_ingestion_service.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Self
|
4 |
+
|
5 |
+
from ctp_slack_bot.core import Settings
|
6 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
7 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
8 |
+
|
9 |
+
class ContentIngestionService(BaseModel):
|
10 |
+
"""
|
11 |
+
Service for ingesting content.
|
12 |
+
"""
|
13 |
+
|
14 |
+
settings: Settings
|
15 |
+
vector_database_service: VectorDatabaseService
|
16 |
+
vectorization_service: VectorizationService
|
17 |
+
|
18 |
+
@model_validator(mode='after')
|
19 |
+
def post_init(self: Self) -> Self:
|
20 |
+
logger.debug("Created {}", self.__class__.__name__)
|
21 |
+
return self
|
src/ctp_slack_bot/services/{ContextRetrievalService.py β context_retrieval_service.py}
RENAMED
@@ -1,29 +1,31 @@
|
|
1 |
-
import
|
2 |
-
from
|
|
|
3 |
|
4 |
-
from ctp_slack_bot.
|
5 |
-
from ctp_slack_bot.models
|
6 |
-
from ctp_slack_bot.
|
7 |
-
from ctp_slack_bot.services.
|
8 |
-
from ctp_slack_bot.services.VectorDatabaseService import VectorDatabaseService
|
9 |
-
from ctp_slack_bot.core.config import settings
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
class ContextRetrievalService:
|
14 |
"""
|
15 |
Service for retrieving relevant context from the vector database based on user questions.
|
16 |
"""
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
async def initialize(self):
|
23 |
"""
|
24 |
Initialize the required services.
|
25 |
"""
|
26 |
-
await self.
|
27 |
|
28 |
async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
|
29 |
"""
|
@@ -62,7 +64,7 @@ class ContextRetrievalService:
|
|
62 |
)
|
63 |
|
64 |
# Search for similar content in vector database
|
65 |
-
context_results = await self.
|
66 |
query=vector_query,
|
67 |
query_embedding=query_embedding
|
68 |
)
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Any, Dict, List, Optional, Self
|
4 |
|
5 |
+
from ctp_slack_bot.core.config import Settings
|
6 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery
|
7 |
+
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
8 |
+
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
|
|
|
9 |
|
10 |
+
class ContextRetrievalService(BaseModel):
|
|
|
|
|
11 |
"""
|
12 |
Service for retrieving relevant context from the vector database based on user questions.
|
13 |
"""
|
14 |
+
|
15 |
+
settings: Settings
|
16 |
+
vectorization_service: VectorizationService
|
17 |
+
vector_database_service: VectorDatabaseService
|
18 |
+
|
19 |
+
@model_validator(mode='after')
|
20 |
+
def post_init(self: Self) -> Self:
|
21 |
+
logger.debug("Created {}", self.__class__.__name__)
|
22 |
+
return self
|
23 |
|
24 |
async def initialize(self):
|
25 |
"""
|
26 |
Initialize the required services.
|
27 |
"""
|
28 |
+
await self.vector_database_service.initialize()
|
29 |
|
30 |
async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
|
31 |
"""
|
|
|
64 |
)
|
65 |
|
66 |
# Search for similar content in vector database
|
67 |
+
context_results = await self.vector_database_service.search_by_similarity(
|
68 |
query=vector_query,
|
69 |
query_embedding=query_embedding
|
70 |
)
|
src/ctp_slack_bot/services/event_brokerage_service.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import Any, Callable, Dict, List, Self
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
|
10 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
11 |
+
|
12 |
+
|
13 |
+
class EventBrokerageService(BaseModel):
|
14 |
+
"""
|
15 |
+
Service for brokering events between services.
|
16 |
+
"""
|
17 |
+
|
18 |
+
subscribers: Dict[str, List[Callable]] = {}
|
19 |
+
|
20 |
+
class Config:
|
21 |
+
arbitrary_types_allowed = True
|
22 |
+
|
23 |
+
@model_validator(mode='after')
|
24 |
+
def post_init(self: Self) -> Self:
|
25 |
+
logger.debug("Created {}", self.__class__.__name__)
|
26 |
+
return self
|
27 |
+
|
28 |
+
def subscribe(self: Self, event_type: str, callback: Callable) -> None:
|
29 |
+
"""Subscribe to an event type with a callback function."""
|
30 |
+
if event_type not in self.subscribers:
|
31 |
+
self.subscribers[event_type] = []
|
32 |
+
self.subscribers[event_type].append(callback)
|
33 |
+
|
34 |
+
def publish(self: Self, event_type: str, data: Any = None) -> None:
|
35 |
+
"""Publish an event with optional data to all subscribers."""
|
36 |
+
if event_type in self.subscribers:
|
37 |
+
for callback in self.subscribers[event_type]:
|
38 |
+
callback(data)
|
src/ctp_slack_bot/services/question_dispatch_service.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
|
10 |
+
from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
|
11 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
12 |
+
|
13 |
+
|
14 |
+
class QuestionDispatchService(BaseModel):
|
15 |
+
"""
|
16 |
+
Service for determining whether a Slack message constitutes a question.
|
17 |
+
"""
|
18 |
+
|
19 |
+
settings: Settings
|
20 |
+
event_brokerage_service: EventBrokerageService
|
21 |
+
context_retrieval_service: ContextRetrievalService
|
22 |
+
answer_retrieval_service: AnswerRetrievalService
|
23 |
+
|
24 |
+
@model_validator(mode='after')
|
25 |
+
def post_init(self: Self) -> Self:
|
26 |
+
logger.debug("Created {}", self.__class__.__name__)
|
27 |
+
return self
|
28 |
+
|
29 |
+
def push(self: Self, message: SlackMessage) -> None:
|
30 |
+
context = self.context_retrieval_service.get_context(message)
|
31 |
+
self.answer_retrieval_service.generate_answer(message, context)
|
src/ctp_slack_bot/services/slack_service.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from asyncio import create_task
|
2 |
+
from loguru import logger
|
3 |
+
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self, Tuple
|
6 |
+
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
+
from ctp_slack_bot.models import RetreivedContext, SlackMessage
|
9 |
+
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
10 |
+
|
11 |
+
|
12 |
+
class SlackService(BaseModel):
|
13 |
+
"""
|
14 |
+
Service for interfacing with Slack.
|
15 |
+
"""
|
16 |
+
|
17 |
+
settings: Settings
|
18 |
+
event_brokerage_service: EventBrokerageService
|
19 |
+
|
20 |
+
@model_validator(mode='after')
|
21 |
+
def post_init(self: Self) -> Self:
|
22 |
+
logger.debug("Created {}", self.__class__.__name__)
|
23 |
+
return self
|
src/ctp_slack_bot/services/{VectorDatabaseService.py β vector_database_service.py}
RENAMED
@@ -1,18 +1,24 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
|
4 |
|
5 |
-
from ctp_slack_bot.
|
6 |
-
from ctp_slack_bot.
|
7 |
-
from ctp_slack_bot.models
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
class VectorDatabaseService:
|
12 |
"""
|
13 |
Service for storing and retrieving vector embeddings from MongoDB.
|
14 |
"""
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
async def initialize(self):
|
17 |
"""
|
18 |
Initialize the database connection.
|
|
|
1 |
+
from loguru import logger
|
2 |
+
from pydantic import BaseModel, model_validator
|
3 |
+
from typing import Any, Dict, List, Optional, Self
|
4 |
|
5 |
+
from ctp_slack_bot.core import Settings
|
6 |
+
from ctp_slack_bot.db import MongoDB
|
7 |
+
from ctp_slack_bot.models import VectorQuery, RetreivedContext
|
8 |
|
9 |
+
class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
|
|
|
|
|
10 |
"""
|
11 |
Service for storing and retrieving vector embeddings from MongoDB.
|
12 |
"""
|
13 |
+
|
14 |
+
settings: Settings
|
15 |
+
mongo_db: MongoDB
|
16 |
+
|
17 |
+
@model_validator(mode='after')
|
18 |
+
def post_init(self: Self) -> Self:
|
19 |
+
logger.debug("Created {}", self.__class__.__name__)
|
20 |
+
return self
|
21 |
+
|
22 |
async def initialize(self):
|
23 |
"""
|
24 |
Initialize the database connection.
|
src/ctp_slack_bot/services/{VectorizationService.py β vectorization_service.py}
RENAMED
@@ -1,17 +1,26 @@
|
|
1 |
-
from
|
2 |
-
from typing import List, Optional
|
3 |
-
from ctp_slack_bot.core.config import settings
|
4 |
import numpy as np
|
5 |
from openai import OpenAI
|
|
|
|
|
6 |
|
|
|
7 |
|
8 |
-
class VectorizationService():
|
9 |
"""
|
10 |
Service for vectorizing chunks of text data.
|
11 |
"""
|
12 |
-
def __init__(self):
|
13 |
-
self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def get_embeddings(self, texts: List[str]) -> np.ndarray:
|
17 |
"""Get embeddings for a list of texts using OpenAI's API.
|
@@ -28,7 +37,7 @@ class VectorizationService():
|
|
28 |
try:
|
29 |
# Use the initialized client instead of the global openai module
|
30 |
response = self.client.embeddings.create(
|
31 |
-
model=settings.EMBEDDING_MODEL,
|
32 |
input=texts,
|
33 |
encoding_format="float" # Ensure we get raw float values
|
34 |
)
|
@@ -36,9 +45,9 @@ class VectorizationService():
|
|
36 |
# Extract embeddings and verify dimensions
|
37 |
embeddings = np.array([data.embedding for data in response.data])
|
38 |
|
39 |
-
if embeddings.shape[1] != settings.VECTOR_DIMENSION:
|
40 |
raise ValueError(
|
41 |
-
f"Embedding dimension mismatch. Expected {settings.VECTOR_DIMENSION}, "
|
42 |
f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
|
43 |
f"in config.py to match the model's output."
|
44 |
)
|
|
|
1 |
+
from loguru import logger
|
|
|
|
|
2 |
import numpy as np
|
3 |
from openai import OpenAI
|
4 |
+
from pydantic import BaseModel, model_validator
|
5 |
+
from typing import List, Optional, Self
|
6 |
|
7 |
+
from ctp_slack_bot.core import Settings
|
8 |
|
9 |
+
class VectorizationService(BaseModel):
|
10 |
"""
|
11 |
Service for vectorizing chunks of text data.
|
12 |
"""
|
|
|
|
|
13 |
|
14 |
+
settings: Settings
|
15 |
+
client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
|
16 |
+
|
17 |
+
class Config:
|
18 |
+
arbitrary_types_allowed = True
|
19 |
+
|
20 |
+
@model_validator(mode='after')
|
21 |
+
def post_init(self: Self) -> Self:
|
22 |
+
logger.debug("Created {}", self.__class__.__name__)
|
23 |
+
return self
|
24 |
|
25 |
def get_embeddings(self, texts: List[str]) -> np.ndarray:
|
26 |
"""Get embeddings for a list of texts using OpenAI's API.
|
|
|
37 |
try:
|
38 |
# Use the initialized client instead of the global openai module
|
39 |
response = self.client.embeddings.create(
|
40 |
+
model=self.settings.EMBEDDING_MODEL,
|
41 |
input=texts,
|
42 |
encoding_format="float" # Ensure we get raw float values
|
43 |
)
|
|
|
45 |
# Extract embeddings and verify dimensions
|
46 |
embeddings = np.array([data.embedding for data in response.data])
|
47 |
|
48 |
+
if embeddings.shape[1] != self.settings.VECTOR_DIMENSION:
|
49 |
raise ValueError(
|
50 |
+
f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, "
|
51 |
f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
|
52 |
f"in config.py to match the model's output."
|
53 |
)
|
src/ctp_slack_bot/tasks/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
src/ctp_slack_bot/tasks/scheduler.py
CHANGED
@@ -6,9 +6,7 @@ from loguru import logger
|
|
6 |
from pytz import timezone
|
7 |
from typing import Optional
|
8 |
|
9 |
-
from ctp_slack_bot
|
10 |
-
#from ctp_slack_bot.tasks.error_report import send_error_report
|
11 |
-
#from ctp_slack_bot.tasks.transcript_cleanup import cleanup_old_transcripts
|
12 |
|
13 |
@inject
|
14 |
def start_scheduler(container: Container) -> AsyncIOScheduler:
|
|
|
6 |
from pytz import timezone
|
7 |
from typing import Optional
|
8 |
|
9 |
+
from ctp_slack_bot import Container
|
|
|
|
|
10 |
|
11 |
@inject
|
12 |
def start_scheduler(container: Container) -> AsyncIOScheduler:
|