LiKenun commited on
Commit
9fd6e20
·
1 Parent(s): ef444a7
pyproject.toml CHANGED
@@ -33,7 +33,6 @@ dependencies = [
33
  "apscheduler>=3.11.0",
34
  "slack-sdk>=3.35.0",
35
  "pymongo>=4.11.3 ",
36
- "numpy>=2.2.4",
37
  "webvtt-py>=0.5.1",
38
  "openai>=1.70.0",
39
  # "langchain>=0.3.23",
 
33
  "apscheduler>=3.11.0",
34
  "slack-sdk>=3.35.0",
35
  "pymongo>=4.11.3 ",
 
36
  "webvtt-py>=0.5.1",
37
  "openai>=1.70.0",
38
  # "langchain>=0.3.23",
src/ctp_slack_bot/__init__.py CHANGED
@@ -1 +0,0 @@
1
- from ctp_slack_bot.containers import Container
 
 
src/ctp_slack_bot/api/main.py CHANGED
@@ -1,12 +1,19 @@
1
  from contextlib import asynccontextmanager
 
2
  from fastapi import FastAPI, HTTPException, Depends
3
  from loguru import logger
4
- from typing import AsyncGenerator
5
- from dependency_injector.wiring import inject, Provide
 
 
 
 
 
6
 
7
- from ctp_slack_bot import Container
8
  from ctp_slack_bot.api.routes import router
9
- from ctp_slack_bot.core import Settings, setup_logging
 
10
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
11
  from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
12
 
@@ -16,6 +23,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
16
  Lifespan context manager for FastAPI application.
17
  Handles startup and shutdown events.
18
  """
 
19
  # Initialize container and wire the container to modules that need dependency injection.
20
  container = Container()
21
  container.wire(packages=['ctp_slack_bot'])
@@ -25,10 +33,38 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
25
  setup_logging(container)
26
  logger.info("Starting application")
27
 
 
 
 
28
  # Start the scheduler.
29
  scheduler = start_scheduler(container)
30
  logger.info("Started scheduler")
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  yield # control to FastAPI until shutdown.
33
 
34
  # Shutdown.
@@ -36,7 +72,6 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
36
  stop_scheduler(scheduler)
37
  logger.info("Stopped scheduler")
38
 
39
-
40
  app = FastAPI(
41
  title="CTP Slack Bot",
42
  description="A Slack bot for processing and analyzing Zoom transcripts using AI",
 
1
  from contextlib import asynccontextmanager
2
+ from dependency_injector.wiring import inject, Provide
3
  from fastapi import FastAPI, HTTPException, Depends
4
  from loguru import logger
5
+ from typing import Any, AsyncGenerator
6
+ from slack_bolt import App
7
+ from slack_bolt.adapter.socket_mode import SocketModeHandler
8
+ from starlette.requests import Request
9
+ from starlette.responses import Response
10
+ from threading import Thread
11
+ from typing import Any, Dict, Self
12
 
13
+ from ctp_slack_bot.containers import Container
14
  from ctp_slack_bot.api.routes import router
15
+ from ctp_slack_bot.core.config import Settings
16
+ from ctp_slack_bot.core.logging import setup_logging
17
  from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
18
  from ctp_slack_bot.tasks import start_scheduler, stop_scheduler
19
 
 
23
  Lifespan context manager for FastAPI application.
24
  Handles startup and shutdown events.
25
  """
26
+
27
  # Initialize container and wire the container to modules that need dependency injection.
28
  container = Container()
29
  container.wire(packages=['ctp_slack_bot'])
 
33
  setup_logging(container)
34
  logger.info("Starting application")
35
 
36
+ # Log route being served.
37
+ logger.info("Serving {} route(s):{}", len(app.routes), ''.join(f"\n* {", ".join(route.methods)} {route.path}" for route in app.routes))
38
+
39
  # Start the scheduler.
40
  scheduler = start_scheduler(container)
41
  logger.info("Started scheduler")
42
 
43
+ # Initialize primordial dependencies in container.
44
+ container.primordial_services()
45
+
46
+ # Start Slack socket mode in a background thread and set up an event handler for the Bolt app.
47
+ bolt_app = container.slack_bolt_app()
48
+ slack_service = container.slack_service()
49
+ @bolt_app.event("message")
50
+ def handle_message(body: Dict[str, Any]) -> None:
51
+ logger.debug("Received Slack message event: {}", body)
52
+ slack_service.process_message(body)
53
+ @bolt_app.event("app_mention")
54
+ def handle_message(body: Dict[str, Any]) -> None:
55
+ logger.debug("Received Slack app mention event: {}", body)
56
+ slack_service.process_message(body)
57
+
58
+ # Start Socket Mode handler in a background thread
59
+ socket_mode_handler = SocketModeHandler(
60
+ app=bolt_app,
61
+ app_token=container.settings().SLACK_APP_TOKEN.get_secret_value()
62
+ )
63
+ socket_thread = Thread(target=socket_mode_handler.start)
64
+ socket_thread.daemon = True
65
+ socket_thread.start()
66
+ logger.info("Started Slack Socket Mode handler")
67
+
68
  yield # control to FastAPI until shutdown.
69
 
70
  # Shutdown.
 
72
  stop_scheduler(scheduler)
73
  logger.info("Stopped scheduler")
74
 
 
75
  app = FastAPI(
76
  title="CTP Slack Bot",
77
  description="A Slack bot for processing and analyzing Zoom transcripts using AI",
src/ctp_slack_bot/api/routes.py CHANGED
@@ -2,9 +2,9 @@ from fastapi import APIRouter, Depends, HTTPException, status
2
  from dependency_injector.wiring import inject, Provide
3
  from loguru import logger
4
 
5
- from ctp_slack_bot import Container
6
- from ctp_slack_bot.core import Settings
7
- from ctp_slack_bot.services import SlackService
8
 
9
  router = APIRouter(prefix="/api/v1")
10
 
@@ -15,6 +15,51 @@ async def get_env(settings: Settings = Depends(Provide[Container.settings])) ->
15
  raise HTTPException(status_code=404)
16
  return settings
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
19
  # async def analyze_transcript(
20
  # request: TranscriptRequest,
 
2
  from dependency_injector.wiring import inject, Provide
3
  from loguru import logger
4
 
5
+ from ctp_slack_bot.containers import Container
6
+ from ctp_slack_bot.core.config import Settings
7
+ from ctp_slack_bot.services import AnswerRetrievalService, ContentIngestionService, ContextRetrievalService, EmbeddingsModelService, LanguageModelService, QuestionDispatchService, SlackService, VectorDatabaseService, VectorizationService
8
 
9
  router = APIRouter(prefix="/api/v1")
10
 
 
15
  raise HTTPException(status_code=404)
16
  return settings
17
 
18
+ @router.get("/answer_retrieval_service")
19
+ @inject
20
+ async def test(answer_retrieval_service: AnswerRetrievalService = Depends(Provide[Container.answer_retrieval_service])) -> None:
21
+ pass
22
+
23
+ @router.get("/content_ingestion_service")
24
+ @inject
25
+ async def test(content_ingestion_service: ContentIngestionService = Depends(Provide[Container.content_ingestion_service])) -> None:
26
+ pass
27
+
28
+ @router.get("/context_retrieval_service")
29
+ @inject
30
+ async def test(context_retrieval_service: ContextRetrievalService = Depends(Provide[Container.context_retrieval_service])) -> None:
31
+ pass
32
+
33
+ @router.get("/embeddings_model_service")
34
+ @inject
35
+ async def test(embeddings_model_service: EmbeddingsModelService = Depends(Provide[Container.embeddings_model_service])) -> None:
36
+ pass
37
+
38
+ @router.get("/language_model_service")
39
+ @inject
40
+ async def test(language_model_service: LanguageModelService = Depends(Provide[Container.language_model_service])) -> None:
41
+ pass
42
+
43
+ @router.get("/question_dispatch_service")
44
+ @inject
45
+ async def test(question_dispatch_service: QuestionDispatchService = Depends(Provide[Container.question_dispatch_service])) -> None:
46
+ pass
47
+
48
+ @router.get("/slack_service")
49
+ @inject
50
+ async def test(slack_service: SlackService = Depends(Provide[Container.slack_service])) -> None:
51
+ pass
52
+
53
+ @router.get("/vector_database_service")
54
+ @inject
55
+ async def test(vector_database_service: VectorDatabaseService = Depends(Provide[Container.vector_database_service])) -> None:
56
+ pass
57
+
58
+ @router.get("/vectorization_service")
59
+ @inject
60
+ async def test(vectorization_service: VectorizationService = Depends(Provide[Container.vectorization_service])) -> None:
61
+ pass
62
+
63
  # @router.post("/transcripts/analyze", response_model=TranscriptResponse)
64
  # async def analyze_transcript(
65
  # request: TranscriptRequest,
src/ctp_slack_bot/containers.py CHANGED
@@ -1,13 +1,16 @@
1
  from dependency_injector.containers import DeclarativeContainer
2
- from dependency_injector.providers import Factory, Singleton
3
- from openai import OpenAI
 
4
 
5
  from ctp_slack_bot.core.config import Settings
6
  from ctp_slack_bot.db.mongo_db import MongoDB
7
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
8
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
9
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 
10
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
11
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
12
  from ctp_slack_bot.services.slack_service import SlackService
13
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
@@ -16,29 +19,21 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
16
 
17
  class Container(DeclarativeContainer):
18
  settings = Singleton(Settings)
19
-
20
  event_brokerage_service = Singleton(EventBrokerageService)
21
-
22
- mongo_db = Singleton(MongoDB, settings=settings)
23
-
24
  # Repositories
25
  # transcript_repository = Factory(
26
  # # Your transcript repository class
27
- # db=db
28
  # )
29
-
30
- open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client.
31
-
32
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
33
-
34
- vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client)
35
-
36
  content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
37
-
38
  context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
39
-
40
- answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client)
41
-
42
  question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
43
-
44
- slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service)
 
1
  from dependency_injector.containers import DeclarativeContainer
2
+ from dependency_injector.providers import Factory, List, Singleton
3
+ from slack_bolt import App
4
+ from slack_bolt.adapter.socket_mode import SocketModeHandler
5
 
6
  from ctp_slack_bot.core.config import Settings
7
  from ctp_slack_bot.db.mongo_db import MongoDB
8
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
9
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
10
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
+ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
12
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
13
+ from ctp_slack_bot.services.language_model_service import LanguageModelService
14
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
15
  from ctp_slack_bot.services.slack_service import SlackService
16
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
 
19
 
20
  class Container(DeclarativeContainer):
21
  settings = Singleton(Settings)
 
22
  event_brokerage_service = Singleton(EventBrokerageService)
23
+ slack_bolt_app = Factory(App, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value())
24
+ mongo_db = Singleton(MongoDB, settings=settings) # TODO: we could really use less commitment to MongoDB.
 
25
  # Repositories
26
  # transcript_repository = Factory(
27
  # # Your transcript repository class
28
+ # mongo_db=mongo_db
29
  # )
 
 
 
30
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
31
+ embeddings_model_service = Singleton(EmbeddingsModelService, settings=settings)
32
+ vectorization_service = Singleton(VectorizationService, settings=settings, embeddings_model_service=embeddings_model_service)
 
33
  content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service)
 
34
  context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service)
35
+ language_model_service = Singleton(LanguageModelService, settings=settings)
36
+ answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, language_model_service=language_model_service)
 
37
  question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service)
38
+ slack_service = Singleton(SlackService, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app)
39
+ primordial_services = List(settings, event_brokerage_service, slack_bolt_app, slack_service, question_dispatch_service, content_ingestion_service)
src/ctp_slack_bot/core/__init__.py CHANGED
@@ -1,2 +1 @@
1
  from ctp_slack_bot.core.config import Settings
2
- from ctp_slack_bot.core.logging import logger, setup_logging
 
1
  from ctp_slack_bot.core.config import Settings
 
src/ctp_slack_bot/core/logging.py CHANGED
@@ -1,7 +1,10 @@
 
1
  from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
2
  from loguru import logger
3
  from sys import stderr
4
- from typing import Dict, Union
 
 
5
 
6
  class InterceptHandler(Handler):
7
  """
@@ -11,7 +14,7 @@ class InterceptHandler(Handler):
11
  to Loguru, allowing unified logging across the application.
12
  """
13
 
14
- def emit(self, record: LogRecord) -> None:
15
  # Get corresponding Loguru level if it exists
16
  try:
17
  level = logger.level(record.levelname).name
@@ -29,7 +32,7 @@ class InterceptHandler(Handler):
29
  )
30
 
31
 
32
- def setup_logging(container: "Container") -> None:
33
  """
34
  Configure logging with Loguru.
35
 
@@ -37,7 +40,7 @@ def setup_logging(container: "Container") -> None:
37
  configures the log format based on settings, and intercepts
38
  standard logging messages.
39
  """
40
- from ctp_slack_bot import Container
41
  settings = container.settings() if container else Provide[Container.settings]
42
 
43
  # Remove default loguru handler
 
1
+ from dependency_injector.wiring import Provide
2
  from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
3
  from loguru import logger
4
  from sys import stderr
5
+ from typing import Self
6
+
7
+ from ctp_slack_bot.containers import Container
8
 
9
  class InterceptHandler(Handler):
10
  """
 
14
  to Loguru, allowing unified logging across the application.
15
  """
16
 
17
+ def emit(self: Self, record: LogRecord) -> None:
18
  # Get corresponding Loguru level if it exists
19
  try:
20
  level = logger.level(record.levelname).name
 
32
  )
33
 
34
 
35
+ def setup_logging(container: "Container") -> None: # TODO: Perhaps get rid of the container dependence since we only need two settings.
36
  """
37
  Configure logging with Loguru.
38
 
 
40
  configures the log format based on settings, and intercepts
41
  standard logging messages.
42
  """
43
+ from ctp_slack_bot.containers import Container
44
  settings = container.settings() if container else Provide[Container.settings]
45
 
46
  # Remove default loguru handler
src/ctp_slack_bot/core/response_rendering.py CHANGED
@@ -10,4 +10,4 @@ class PrettyJSONResponse(JSONResponse):
10
  allow_nan=False,
11
  indent=4,
12
  separators=(", ", ": "),
13
- ).encode("utf-8")
 
10
  allow_nan=False,
11
  indent=4,
12
  separators=(", ", ": "),
13
+ ).encode()
src/ctp_slack_bot/db/mongo_db.py CHANGED
@@ -1,6 +1,6 @@
1
  #from motor.motor_asyncio import AsyncIOMotorClient
2
  from loguru import logger
3
- from pydantic import BaseModel, model_validator
4
  #from pymongo import IndexModel, ASCENDING
5
  from typing import Optional, Self
6
 
@@ -13,19 +13,23 @@ class MongoDB(BaseModel):
13
  """
14
 
15
  settings: Settings
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  @model_validator(mode='after')
18
  def post_init(self: Self) -> Self:
19
  logger.debug("Created {}", self.__class__.__name__)
20
  return self
21
 
22
- def __init__(self: Self, settings: Settings) -> Self:
23
- #self.client: Optional[AsyncIOMotorClient] = None
24
- #self.db = None
25
- #self.vector_collection = None
26
- #self.initialized = False
27
- pass # The above initialization needs to be done some other way.
28
-
29
  # async def connect(self):
30
  # """
31
  # Connect to MongoDB using connection string from settings.
 
1
  #from motor.motor_asyncio import AsyncIOMotorClient
2
  from loguru import logger
3
+ from pydantic import BaseModel, model_validator, PrivateAttr
4
  #from pymongo import IndexModel, ASCENDING
5
  from typing import Optional, Self
6
 
 
13
  """
14
 
15
  settings: Settings
16
+ _client: PrivateAttr = PrivateAttr()
17
+ _db: PrivateAttr = PrivateAttr()
18
+ _vector_collection: PrivateAttr = PrivateAttr()
19
+ _initialized: PrivateAttr = PrivateAttr()
20
+
21
+ def __init__(self: Self, **data) -> None:
22
+ super().__init__(**data)
23
+ self._client = None
24
+ self._db = None
25
+ self._vector_collection = None
26
+ self._initialized = False
27
 
28
  @model_validator(mode='after')
29
  def post_init(self: Self) -> Self:
30
  logger.debug("Created {}", self.__class__.__name__)
31
  return self
32
 
 
 
 
 
 
 
 
33
  # async def connect(self):
34
  # """
35
  # Connect to MongoDB using connection string from settings.
src/ctp_slack_bot/enums.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from enum import auto, StrEnum
2
+
3
+ class EventType(StrEnum):
4
+ INCOMING_CONTENT = auto()
5
+ INCOMING_SLACK_MESSAGE = auto()
6
+ OUTGOING_SLACK_RESPONSE = auto()
src/ctp_slack_bot/models/__init__.py CHANGED
@@ -1,4 +1,3 @@
1
- from ctp_slack_bot.models.base import Content, Ingestible, Metadata
2
- from ctp_slack_bot.models.content import RetreivedContext
3
- from ctp_slack_bot.models.slack import SlackMessage
4
  from ctp_slack_bot.models.vector_query import VectorQuery
 
1
+ from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk
2
+ from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair
 
3
  from ctp_slack_bot.models.vector_query import VectorQuery
src/ctp_slack_bot/models/base.py CHANGED
@@ -1,61 +1,52 @@
1
  from abc import ABC, abstractmethod
2
- from datetime import datetime
3
- from pydantic import BaseModel, Field, validator
4
- from typing import Dict, List, Optional, Union, Any, ClassVar
5
- import hashlib
6
- import json
7
 
8
 
9
- class Metadata(BaseModel):
10
- """A class representing metadata about content."""
11
 
12
- id: str # The content’s identity consistent across modifications
13
- modification_time: datetime # The content’s modification for detection of alterations
14
- hash: str # The content’s hash for detection of alterations
 
15
 
 
16
 
17
- class Content(BaseModel):
18
- """A class representing ingested content."""
19
 
20
- metadata: Metadata
 
 
21
 
 
22
 
 
23
 
24
- class Ingestible(ABC, BaseModel):
25
- """An abstract base class for ingestible content."""
26
 
27
- metadata: Metadata
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  @property
30
  @abstractmethod
31
- def content(self) -> Content:
32
- """
33
- Return content ready for vectorization.
34
-
35
- This could be:
36
- - A single string
37
- - A list of strings (pre-chunked)
38
- - A more complex structure that can be recursively processed
39
- """
40
  pass
41
-
42
- def get_chunks(self) -> List[str]:
43
- """
44
- Split content into chunks suitable for vectorization.
45
- Override this in subclasses for specialized chunking logic.
46
- """
47
- content = self.content
48
- if isinstance(content, str):
49
- # Simple chunking by character count
50
- return [content[i:i+self.chunk_size]
51
- for i in range(0, len(content), self.chunk_size)]
52
- elif isinstance(content, list):
53
- # Content is already chunked
54
- return content
55
- else:
56
- raise ValueError(f"Unsupported content type: {type(content)}")
57
-
58
  @property
59
- def key(self) -> str:
60
- """Convenience accessor for the metadata key."""
61
- return self.metadata.key
 
 
 
 
 
 
1
  from abc import ABC, abstractmethod
2
+ from pydantic import BaseModel, ConfigDict
3
+ from typing import Any, Dict, final, Self, Sequence
 
 
 
4
 
5
 
6
+ class Chunk(BaseModel):
7
+ """A class representing a chunk of content."""
8
 
9
+ text: str # The text representation
10
+ parent_id: str # The source content’s identity
11
+ chunk_id: str # This chunk’s identity—unique within the source content
12
+ metadata: Dict[str, Any]
13
 
14
+ model_config = ConfigDict(frozen=True)
15
 
 
 
16
 
17
+ @final
18
+ class VectorizedChunk(Chunk):
19
+ """A class representing a vectorized chunk of content."""
20
 
21
+ embedding: Sequence[float] # The vector representation
22
 
23
+ model_config = ConfigDict(frozen=True)
24
 
 
 
25
 
26
+ class Content(ABC, BaseModel):
27
+ """An abstract base class for all types of content."""
28
+
29
+ model_config = ConfigDict(frozen=True)
30
+
31
+ @abstractmethod
32
+ def get_chunks(self: Self) -> Sequence[Chunk]:
33
+ pass
34
+
35
+ @abstractmethod
36
+ def get_metadata(self: Self) -> Dict[str, Any]:
37
+ pass
38
 
39
  @property
40
  @abstractmethod
41
+ def get_text(self: Self) -> str:
 
 
 
 
 
 
 
 
42
  pass
43
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  @property
45
+ @abstractmethod
46
+ def bytes(self: Self) -> bytes:
47
+ pass
48
+
49
+ @property
50
+ @abstractmethod
51
+ def id(self: Self) -> str:
52
+ pass
src/ctp_slack_bot/models/content.py DELETED
@@ -1,19 +0,0 @@
1
- from pydantic import BaseModel, Field
2
- from typing import Optional, List, Dict, Any
3
- from ctp_slack_bot.models.slack import SlackMessage
4
-
5
- class RetreivedContext(BaseModel):
6
- """Represents a the context of a question from Slack returned from the Vector Store Database.
7
-
8
- contextual_text: The text that is relevant to the question.
9
- metadata_source: The source of the contextual text.
10
- similarity_score: The similarity score of the contextual text to the question.
11
-
12
- in_reation_to_question: OPTINAL: The question that the contextual text is related to.
13
- """
14
- contextual_text: str
15
- metadata_source: str
16
- similarity_score: float
17
-
18
- said_by: str = Optional[None]
19
- in_reation_to_question: str = Optional[None]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/models/slack.py CHANGED
@@ -1,16 +1,96 @@
1
- from pydantic import BaseModel, Field
2
- from typing import Optional, List, Dict, Any
 
 
3
 
4
- class SlackMessage(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """Represents a message from Slack after adaptation."""
6
- channel_id: str
7
- user_id: str
8
- text: str
 
 
 
 
9
  thread_ts: Optional[str] = None
10
- timestamp: str
11
- is_question: bool = False
12
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  @property
14
- def key(self) -> str:
 
 
 
 
15
  """Unique identifier for this message."""
16
- return f"slack:{self.channel_id}:{self.timestamp}"
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from json import dumps
3
+ from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr
4
+ from typing import Any, Dict, Literal, Optional, Self, Sequence
5
 
6
+ from ctp_slack_bot.models.base import Chunk, Content
7
+
8
+ class SlackEventPayload(BaseModel):
9
+ """Represents a general event payload from Slack."""
10
+ type: str
11
+ event_ts: str
12
+
13
+ model_config = ConfigDict(extra='allow', frozen=True)
14
+
15
+ class SlackEvent(BaseModel):
16
+ """Represents a general event from Slack."""
17
+
18
+ token: str
19
+ team_id: str
20
+ api_app_id: str
21
+ event: SlackEventPayload
22
+ type: str
23
+ event_id: str
24
+ event_time: int
25
+ authed_users: Sequence[str]
26
+
27
+ model_config = ConfigDict(frozen=True)
28
+
29
+ class SlackUserTimestampPair(BaseModel):
30
+ """Represents a Slack user-timestamp pair."""
31
+
32
+ user: str
33
+ ts: str
34
+
35
+ model_config = ConfigDict(frozen=True)
36
+
37
+ class SlackReaction(BaseModel):
38
+ """Represents a Slack reaction information."""
39
+
40
+ name: str
41
+ count: PositiveInt
42
+ users: Sequence[str]
43
+
44
+ model_config = ConfigDict(frozen=True)
45
+
46
+ class SlackMessage(Content):
47
  """Represents a message from Slack after adaptation."""
48
+
49
+ type: Literal["app_mention", "message"]
50
+ subtype: Optional[str] = None
51
+ channel: str
52
+ channel_type: Optional[str] = None
53
+ user: Optional[str] = None
54
+ bot_id: Optional[str] = None
55
  thread_ts: Optional[str] = None
56
+ text: str
57
+ ts: str
58
+ edited: Optional[SlackUserTimestampPair] = None
59
+ event_ts: str
60
+ deleted_ts: Optional[str] = None
61
+ hidden: bool = False
62
+ is_starred: Optional[bool] = None
63
+ pinned_to: Optional[Sequence[str]] = None
64
+ reactions: Optional[Sequence[SlackReaction]] = None
65
+ _canonical_json: PrivateAttr
66
+
67
+ def __init__(self: Self, **data: Dict[str, Any]) -> None:
68
+ super().__init__(**data)
69
+ self._canonical_json = PrivateAttr(default_factory=lambda: dumps(data, sort_keys=True).encode())
70
+
71
+ def get_chunks(self: Self) -> Sequence[Chunk]:
72
+ return (Chunk(text=self.text, parent_id=self.id, chunk_id="", metadata=self.metadata), )
73
+
74
+ def get_metadata(self: Self) -> Dict[str, Any]:
75
+ return {
76
+ "modificationTime": datetime.fromtimestamp(float(self.ts))
77
+ }
78
+
79
+ def get_text(self: Self) -> str:
80
+ return self.text
81
+
82
  @property
83
+ def bytes(self: Self) -> bytes:
84
+ return self._canonical_json
85
+
86
+ @property
87
+ def id(self: Self) -> str:
88
  """Unique identifier for this message."""
89
+ return f"slack-message:{self.channel}:{self.ts}"
90
+
91
+ class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet.
92
+ """Represents a response message to be sent to Slack."""
93
+
94
+ text: str
95
+ channel: Optional[str]
96
+ thread_ts: Optional[str] = None
src/ctp_slack_bot/models/vector_query.py CHANGED
@@ -1,5 +1,5 @@
1
- from pydantic import BaseModel, Field, validator
2
- from typing import Optional, List, Dict, Any
3
 
4
  class VectorQuery(BaseModel):
5
  """Model for vector database similarity search queries.
@@ -10,7 +10,8 @@ class VectorQuery(BaseModel):
10
  score_threshold: Minimum similarity score threshold for inclusion in results
11
  filter_metadata: Optional filters for metadata fields
12
  """
13
- query_text: str
 
14
  k: int
15
  score_threshold: float = Field(default=0.7)
16
  filter_metadata: Optional[Dict[str, Any]] = None
 
1
+ from pydantic import BaseModel, Field, model_validator
2
+ from typing import Any, Dict, Optional, Sequence
3
 
4
  class VectorQuery(BaseModel):
5
  """Model for vector database similarity search queries.
 
10
  score_threshold: Minimum similarity score threshold for inclusion in results
11
  filter_metadata: Optional filters for metadata fields
12
  """
13
+
14
+ query_embeddings: Sequence[float]
15
  k: int
16
  score_threshold: float = Field(default=0.7)
17
  filter_metadata: Optional[Dict[str, Any]] = None
src/ctp_slack_bot/services/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
2
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
3
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
 
4
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
5
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
6
  from ctp_slack_bot.services.slack_service import SlackService
7
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
 
1
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
2
  from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
3
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
4
+ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
5
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
6
+ from ctp_slack_bot.services.language_model_service import LanguageModelService
7
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
8
  from ctp_slack_bot.services.slack_service import SlackService
9
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
src/ctp_slack_bot/services/answer_retrieval_service.py CHANGED
@@ -1,65 +1,31 @@
1
- # from asyncio import create_task
2
  from loguru import logger
3
- from openai import OpenAI
4
  from pydantic import BaseModel, model_validator
5
- from typing import List, Optional, Self, Tuple
6
 
7
  from ctp_slack_bot.core import Settings
8
- from ctp_slack_bot.models import RetreivedContext, SlackMessage
 
9
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
10
 
11
 
12
- class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service.
13
  """
14
- Service for language model operations.
15
  """
16
 
17
  settings: Settings
18
  event_brokerage_service: EventBrokerageService
19
- client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
20
-
21
- class Config:
22
- arbitrary_types_allowed = True
23
 
24
  @model_validator(mode='after')
25
  def post_init(self: Self) -> Self:
26
  logger.debug("Created {}", self.__class__.__name__)
27
  return self
28
 
29
- def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
30
- """Generate a response using OpenAI's API with retrieved context.
31
-
32
- Args:
33
- question (str): The user's question
34
- context (List[RetreivedContext]): List of RetreivedContext
35
-
36
- Returns:
37
- str: Generated answer
38
- """
39
- # Prepare context string from retrieved chunks
40
- context_str = ""
41
- for c in context:
42
- context_str += f"{c.contextual_text}\n"
43
-
44
-
45
- # logger.info(f"Generating response for question: {question}")
46
- # logger.info(f"Using {len(context)} context chunks")
47
-
48
- # Create messages for the chat completion
49
- messages = [
50
- {"role": "system", "content": settings.SYSTEM_PROMPT},
51
- {"role": "user", "content":
52
- f"""Student Auestion: {question.text}
53
- Context from class materials and transcripts: {context_str}
54
- Please answer the Student Auestion based on the Context from class materials and transcripts. If the context doesn't contain relevant information, acknowledge that and suggest asking the professor."""}
55
- ]
56
-
57
- # Generate response
58
- response = self.client.chat.completions.create(
59
- model=settings.CHAT_MODEL,
60
- messages=messages,
61
- max_tokens=settings.MAX_TOKENS,
62
- temperature=settings.TEMPERATURE
63
- )
64
-
65
- return response.choices[0].message.content
 
 
1
  from loguru import logger
 
2
  from pydantic import BaseModel, model_validator
3
+ from typing import Collection, Self
4
 
5
  from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.enums import EventType
7
+ from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse
8
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
9
+ from ctp_slack_bot.services.language_model_service import LanguageModelService
10
 
11
 
12
+ class AnswerRetrievalService(BaseModel):
13
  """
14
+ Service for context-based answer retrievel from a language model.
15
  """
16
 
17
  settings: Settings
18
  event_brokerage_service: EventBrokerageService
19
+ language_model_service: LanguageModelService
 
 
 
20
 
21
  @model_validator(mode='after')
22
  def post_init(self: Self) -> Self:
23
  logger.debug("Created {}", self.__class__.__name__)
24
  return self
25
 
26
+ def push(self: Self, question: SlackMessage, context: Collection[Chunk]) -> None:
27
+ channel_to_respond_to = question.channel
28
+ thread_to_respond_to = question.thread_ts if question.thread_ts else question.ts
29
+ answer = self.language_model_service.answer_question(question.text, context)
30
+ slack_response = SlackResponse(text=answer, channel=channel_to_respond_to, thread_ts=thread_to_respond_to)
31
+ self.event_brokerage_service.publish(EventType.OUTGOING_SLACK_RESPONSE, slack_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/content_ingestion_service.py CHANGED
@@ -1,8 +1,11 @@
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
- from typing import Self
4
 
5
  from ctp_slack_bot.core import Settings
 
 
 
6
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
7
  from ctp_slack_bot.services.vectorization_service import VectorizationService
8
 
@@ -12,10 +15,33 @@ class ContentIngestionService(BaseModel):
12
  """
13
 
14
  settings: Settings
 
15
  vector_database_service: VectorDatabaseService
16
  vectorization_service: VectorizationService
17
 
18
  @model_validator(mode='after')
19
  def post_init(self: Self) -> Self:
20
  logger.debug("Created {}", self.__class__.__name__)
 
 
21
  return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
+ from typing import Self, Sequence
4
 
5
  from ctp_slack_bot.core import Settings
6
+ from ctp_slack_bot.enums import EventType
7
+ from ctp_slack_bot.models import Chunk, Content, SlackMessage
8
+ from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
9
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
10
  from ctp_slack_bot.services.vectorization_service import VectorizationService
11
 
 
15
  """
16
 
17
  settings: Settings
18
+ event_brokerage_service: EventBrokerageService
19
  vector_database_service: VectorDatabaseService
20
  vectorization_service: VectorizationService
21
 
22
  @model_validator(mode='after')
23
  def post_init(self: Self) -> Self:
24
  logger.debug("Created {}", self.__class__.__name__)
25
+ self.event_brokerage_service.subscribe(EventType.INCOMING_CONTENT, self.process_incoming_content)
26
+ self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.process_incoming_slack_message)
27
  return self
28
+
29
+ def process_incoming_content(self: Self, content: Content) -> None:
30
+ logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata())
31
+ # if self.vector_database_service.has_content(content.id) # TODO
32
+ # logger.debug("Ignored content with ID {} because it already exists in the database.", content.id)
33
+ # return
34
+ chunks = content.get_chunks()
35
+ self.__vectorize_and_store_chunks_in_database(chunks)
36
+ logger.debug("Stored {} vectorized chunk(s) in the database.", len(chunks))
37
+
38
+ def process_incoming_slack_message(self: Self, slack_message: SlackMessage) -> None:
39
+ logger.debug("Content ingestion service received a Slack message: {}", slack_message.text)
40
+ chunks = slack_message.get_chunks()
41
+ self.__vectorize_and_store_chunks_in_database(chunks)
42
+ logger.debug("Stored {} vectorized chunk(s) in the database.", len(chunks))
43
+
44
+ def __vectorize_and_store_chunks_in_database(self: Self, chunks: Sequence[Chunk]) -> None:
45
+ # vectorized_chunks = self.vectorization_service.vectorize(chunks) # TODO
46
+ # self.vector_database_service.store(vectorized_chunks) # TODO
47
+ pass
src/ctp_slack_bot/services/context_retrieval_service.py CHANGED
@@ -1,9 +1,9 @@
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
- from typing import Any, Dict, List, Optional, Self
4
 
5
  from ctp_slack_bot.core.config import Settings
6
- from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery
7
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
  from ctp_slack_bot.services.vectorization_service import VectorizationService
9
 
@@ -20,14 +20,15 @@ class ContextRetrievalService(BaseModel):
20
  def post_init(self: Self) -> Self:
21
  logger.debug("Created {}", self.__class__.__name__)
22
  return self
23
-
24
- async def initialize(self):
25
- """
26
- Initialize the required services.
27
- """
28
- await self.vector_database_service.initialize()
29
-
30
- async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
 
31
  """
32
  Retrieve relevant context for a given Slack message.
33
 
@@ -43,36 +44,37 @@ class ContextRetrievalService(BaseModel):
43
  Returns:
44
  List[RetreivedContext]: List of retrieved context items with similarity scores
45
  """
46
- if not message.is_question:
47
- logger.debug(f"Message {message.key} is not a question, skipping context retrieval")
48
- return []
49
 
50
- try:
51
- # Vectorize the message text
52
- embeddings = self.vectorization_service.get_embeddings([message.text])
53
- if embeddings is None or len(embeddings) == 0:
54
- logger.error(f"Failed to generate embedding for message: {message.key}")
55
- return []
56
 
57
- query_embedding = embeddings[0].tolist()
58
 
59
- # Create vector query
60
- vector_query = VectorQuery(
61
- query_text=message.text,
62
- k=settings.TOP_K_MATCHES,
63
- score_threshold=0.7 # Minimum similarity threshold
64
- )
65
 
66
- # Search for similar content in vector database
67
- context_results = await self.vector_database_service.search_by_similarity(
68
- query=vector_query,
69
- query_embedding=query_embedding
70
- )
71
 
72
- logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
73
- return context_results
74
 
75
- except Exception as e:
76
- logger.error(f"Error retrieving context for message {message.key}: {str(e)}")
77
- return []
78
-
 
 
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
+ from typing import Self, Sequence
4
 
5
  from ctp_slack_bot.core.config import Settings
6
+ from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk
7
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
8
  from ctp_slack_bot.services.vectorization_service import VectorizationService
9
 
 
20
  def post_init(self: Self) -> Self:
21
  logger.debug("Created {}", self.__class__.__name__)
22
  return self
23
+
24
+ # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
25
+ # async def initialize(self):
26
+ # """
27
+ # Initialize the required services.
28
+ # """
29
+ # await self.vector_database_service.initialize()
30
+
31
+ def get_context(self, message: SlackMessage) -> Sequence[Chunk]:
32
  """
33
  Retrieve relevant context for a given Slack message.
34
 
 
44
  Returns:
45
  List[RetreivedContext]: List of retrieved context items with similarity scores
46
  """
47
+ # if not message.is_question:
48
+ # logger.debug(f"Message {message.key} is not a question, skipping context retrieval")
49
+ # return []
50
 
51
+ # try:
52
+ # # Vectorize the message text
53
+ # embeddings = self.vectorization_service.vectorize([message.text])
54
+ # if embeddings is None or len(embeddings) == 0:
55
+ # logger.error(f"Failed to generate embedding for message: {message.key}")
56
+ # return []
57
 
58
+ # query_embedding = embeddings[0].tolist()
59
 
60
+ # # Create vector query
61
+ # vector_query = VectorQuery(
62
+ # query_text=message.text,
63
+ # k=self.settings.TOP_K_MATCHES,
64
+ # score_threshold=0.7 # Minimum similarity threshold
65
+ # )
66
 
67
+ # # Search for similar content chunks in vector database
68
+ # context_results = await self.vector_database_service.search_by_similarity(
69
+ # query=vector_query,
70
+ # query_embedding=query_embedding
71
+ # )
72
 
73
+ # logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
74
+ # return context_results
75
 
76
+ # except Exception as e:
77
+ # logger.error(f"Error retrieving context for message {message.key}: {str(e)}")
78
+ # return []
79
+ return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}),
80
+ VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}))
src/ctp_slack_bot/services/embeddings_model_service.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from openai import OpenAI
3
+ from pydantic import BaseModel, PrivateAttr, model_validator
4
+ from typing import Any, Dict, Sequence, Self
5
+
6
+ from ctp_slack_bot.core import Settings
7
+
8
+ class EmbeddingsModelService(BaseModel):
9
+ """
10
+ Service for embeddings model operations.
11
+ """
12
+
13
+ settings: Settings
14
+ _open_ai_client: PrivateAttr = PrivateAttr()
15
+
16
+ def __init__(self: Self, **data: Dict[str, Any]) -> None:
17
+ super().__init__(**data)
18
+ self._open_ai_client = OpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
19
+
20
+ @model_validator(mode='after')
21
+ def post_init(self: Self) -> Self:
22
+ logger.debug("Created {}", self.__class__.__name__)
23
+ return self
24
+
25
+ def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]:
26
+ """Get embeddings for a collection of texts using OpenAI’s API.
27
+
28
+ Args:
29
+ texts (Collection[str]): Collection of text chunks to embed
30
+
31
+ Returns:
32
+ NDArray: Array of embeddings with shape (n_texts, VECTOR_DIMENSION)
33
+
34
+ Raises:
35
+ ValueError: If the embedding dimensions don't match expected size
36
+ """
37
+ logger.debug("Creating embeddings for {} text string(s)…", len(texts))
38
+ response = self._open_ai_client.embeddings.create(
39
+ model=self.settings.EMBEDDING_MODEL,
40
+ input=texts,
41
+ encoding_format="float" # Ensure we get raw float values.
42
+ )
43
+ embeddings = tuple(tuple(data.embedding) for data in response.data)
44
+ match embeddings:
45
+ case (first, _) if len(first) != self.settings.VECTOR_DIMENSION:
46
+ logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.VECTOR_DIMENSION, len(first))
47
+ raise ValueError() # TODO: raise a more specific type.
48
+ return embeddings
src/ctp_slack_bot/services/event_brokerage_service.py CHANGED
@@ -1,38 +1,46 @@
1
- # from asyncio import create_task
 
 
2
  from loguru import logger
3
- from openai import OpenAI
4
- from pydantic import BaseModel, model_validator
5
  from typing import Any, Callable, Dict, List, Self
6
 
7
- from ctp_slack_bot.core import Settings
8
- from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
- from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService
10
- from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
-
12
 
13
  class EventBrokerageService(BaseModel):
14
  """
15
  Service for brokering events between services.
16
  """
17
 
18
- subscribers: Dict[str, List[Callable]] = {}
19
-
20
- class Config:
21
- arbitrary_types_allowed = True
22
 
23
  @model_validator(mode='after')
24
  def post_init(self: Self) -> Self:
25
  logger.debug("Created {}", self.__class__.__name__)
26
  return self
27
 
28
- def subscribe(self: Self, event_type: str, callback: Callable) -> None:
29
  """Subscribe to an event type with a callback function."""
30
- if event_type not in self.subscribers:
31
- self.subscribers[event_type] = []
32
- self.subscribers[event_type].append(callback)
33
-
34
- def publish(self: Self, event_type: str, data: Any = None) -> None:
 
35
  """Publish an event with optional data to all subscribers."""
36
- if event_type in self.subscribers:
37
- for callback in self.subscribers[event_type]:
38
- callback(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from asyncio import create_task, iscoroutinefunction, to_thread
3
+ from collections import defaultdict
4
  from loguru import logger
5
+ from pydantic import BaseModel, model_validator, PrivateAttr
 
6
  from typing import Any, Callable, Dict, List, Self
7
 
8
+ from ctp_slack_bot.enums import EventType
 
 
 
 
9
 
10
  class EventBrokerageService(BaseModel):
11
  """
12
  Service for brokering events between services.
13
  """
14
 
15
+ _subscribers: PrivateAttr = PrivateAttr(default_factory=lambda: defaultdict(list))
 
 
 
16
 
17
  @model_validator(mode='after')
18
  def post_init(self: Self) -> Self:
19
  logger.debug("Created {}", self.__class__.__name__)
20
  return self
21
 
22
+ def subscribe(self: Self, type: EventType, callback: Callable) -> None:
23
  """Subscribe to an event type with a callback function."""
24
+ logger.debug("1 new subscriber is listening for {} events.", type)
25
+ subscribers = self._subscribers[type]
26
+ subscribers.append(callback)
27
+ logger.debug("Event type {} has {} subscriber(s).", type, len(subscribers))
28
+
29
+ def publish(self: Self, type: EventType, data: Any = None) -> None:
30
  """Publish an event with optional data to all subscribers."""
31
+ subscribers = self._subscribers[type]
32
+ if not subscribers:
33
+ logger.debug("No subscribers handle event {}: {}", type, len(subscribers), data)
34
+ return
35
+ logger.debug("Broadcasting event {} to {} subscriber(s): {}", type, len(subscribers), data)
36
+ for callback in subscribers:
37
+ if iscoroutinefunction(callback):
38
+ task: create_task(callback(data))
39
+ task.add_done_callback(lambda done_task: logger.error("Error in asynchronous event callback handling event {}: {}", done_task.exception())
40
+ if done_task.exception()
41
+ else None)
42
+ else:
43
+ try:
44
+ create_task(to_thread(callback, data))
45
+ except Exception as e:
46
+ logger.error("Error scheduling synchronous callback to handle event {}: {}", type, e)
src/ctp_slack_bot/services/language_model_service.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+ from openai import OpenAI
3
+ from openai.types.chat import ChatCompletion
4
+ from pydantic import BaseModel, PrivateAttr, model_validator
5
+ from typing import Collection, Self
6
+
7
+ from ctp_slack_bot.core import Settings
8
+ from ctp_slack_bot.models import Chunk
9
+
10
+ class LanguageModelService(BaseModel):
11
+ """
12
+ Service for language model operations.
13
+ """
14
+
15
+ settings: Settings
16
+ _open_ai_client: PrivateAttr = PrivateAttr()
17
+
18
+ def __init__(self: Self, **data) -> None:
19
+ super().__init__(**data)
20
+ self._open_ai_client = OpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
21
+
22
+ @model_validator(mode='after')
23
+ def post_init(self: Self) -> Self:
24
+ logger.debug("Created {}", self.__class__.__name__)
25
+ return self
26
+
27
+ def answer_question(self, question: str, context: Collection[Chunk]) -> str:
28
+ """Generate a response using OpenAI’s API with retrieved context.
29
+
30
+ Args:
31
+ question (str): The user’s question
32
+ context (List[RetreivedContext]): The context retreived for answering the question
33
+
34
+ Returns:
35
+ str: Generated answer
36
+ """
37
+ logger.debug("Generating response for question “{}” using {} context chunks…", question, len(context))
38
+ messages = [
39
+ {"role": "system", "content": self.settings.SYSTEM_PROMPT},
40
+ {"role": "user", "content":
41
+ f"""Student Question: {question}
42
+
43
+ Context from class materials and transcripts:
44
+ {'\n'.join(chunk.text for chunk in context)}
45
+
46
+ Please answer the Student Question based on the Context from class materials and transcripts. If the context doesn’t contain relevant information, acknowledge that and suggest asking the professor."""}
47
+ ]
48
+ # response: ChatCompletion = self._open_ai_client.chat.completions.create(
49
+ # model=self.settings.CHAT_MODEL,
50
+ # messages=messages,
51
+ # max_tokens=self.settings.MAX_TOKENS,
52
+ # temperature=self.settings.TEMPERATURE
53
+ # )
54
+
55
+ # return response.choices[0].message.content
56
+ return f"A mock response to “{question}”"
src/ctp_slack_bot/services/question_dispatch_service.py CHANGED
@@ -1,11 +1,11 @@
1
  # from asyncio import create_task
2
  from loguru import logger
3
- from openai import OpenAI
4
  from pydantic import BaseModel, model_validator
5
- from typing import List, Optional, Self, Tuple
6
 
7
  from ctp_slack_bot.core import Settings
8
- from ctp_slack_bot.models import RetreivedContext, SlackMessage
 
9
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
10
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
@@ -24,8 +24,11 @@ class QuestionDispatchService(BaseModel):
24
  @model_validator(mode='after')
25
  def post_init(self: Self) -> Self:
26
  logger.debug("Created {}", self.__class__.__name__)
 
27
  return self
28
 
29
- def push(self: Self, message: SlackMessage) -> None:
30
- context = self.context_retrieval_service.get_context(message)
31
- self.answer_retrieval_service.generate_answer(message, context)
 
 
 
1
  # from asyncio import create_task
2
  from loguru import logger
 
3
  from pydantic import BaseModel, model_validator
4
+ from typing import Self
5
 
6
  from ctp_slack_bot.core import Settings
7
+ from ctp_slack_bot.enums import EventType
8
+ from ctp_slack_bot.models import Chunk, SlackMessage
9
  from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService
10
  from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService
11
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
 
24
  @model_validator(mode='after')
25
  def post_init(self: Self) -> Self:
26
  logger.debug("Created {}", self.__class__.__name__)
27
+ self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.__process_incoming_slack_message)
28
  return self
29
 
30
+ def __process_incoming_slack_message(self: Self, message: SlackMessage) -> None:
31
+ if message.subtype != 'bot_message':
32
+ logger.debug("Question dispatch service received an answerable question: {}", message.text)
33
+ context = self.context_retrieval_service.get_context(message)
34
+ self.answer_retrieval_service.push(message, context)
src/ctp_slack_bot/services/slack_service.py CHANGED
@@ -1,11 +1,11 @@
1
- # from asyncio import create_task
2
  from loguru import logger
3
  from openai import OpenAI
4
  from pydantic import BaseModel, model_validator
5
- from typing import List, Optional, Self, Tuple
 
6
 
7
- from ctp_slack_bot.core import Settings
8
- from ctp_slack_bot.models import RetreivedContext, SlackMessage
9
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
 
11
 
@@ -14,10 +14,36 @@ class SlackService(BaseModel):
14
  Service for interfacing with Slack.
15
  """
16
 
17
- settings: Settings
18
  event_brokerage_service: EventBrokerageService
 
 
 
 
19
 
20
  @model_validator(mode='after')
21
  def post_init(self: Self) -> Self:
 
22
  logger.debug("Created {}", self.__class__.__name__)
23
  return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from loguru import logger
2
  from openai import OpenAI
3
  from pydantic import BaseModel, model_validator
4
+ from slack_bolt import App
5
+ from typing import Any, Dict, Self
6
 
7
+ from ctp_slack_bot.enums import EventType
8
+ from ctp_slack_bot.models import SlackMessage, SlackResponse
9
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
10
 
11
 
 
14
  Service for interfacing with Slack.
15
  """
16
 
 
17
  event_brokerage_service: EventBrokerageService
18
+ slack_bolt_app: App
19
+
20
+ class Config:
21
+ arbitrary_types_allowed = True
22
 
23
  @model_validator(mode='after')
24
  def post_init(self: Self) -> Self:
25
+ self.event_brokerage_service.subscribe(EventType.OUTGOING_SLACK_RESPONSE, self.send_message)
26
  logger.debug("Created {}", self.__class__.__name__)
27
  return self
28
+
29
+ def adapt_event_payload(self: Self, event: Dict[str, Any]) -> SlackMessage:
30
+ return SlackMessage(
31
+ type=event.get("type"),
32
+ subtype=event.get("subtype"),
33
+ channel=event.get("channel"),
34
+ channel_type=event.get("channel_type"),
35
+ user=event.get("user"),
36
+ bot_id=event.get("bot_id"),
37
+ thread_ts=event.get("thread_ts"),
38
+ text=event.get("text", ""),
39
+ ts=event.get("ts"),
40
+ event_ts=event.get("event_ts")
41
+ )
42
+
43
+ def process_message(self: Self, event: Dict[str, Any]) -> None:
44
+ slack_message = self.adapt_event_payload(event.get("event", {}))
45
+ logger.debug("Received message from Slack: {}", slack_message)
46
+ self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message)
47
+
48
+ def send_message(self: Self, message: SlackResponse) -> None:
49
+ self.slack_bolt_app.client.chat_postMessage(channel=message.channel, text=message.text, thread_ts=message.thread_ts)
src/ctp_slack_bot/services/vector_database_service.py CHANGED
@@ -1,10 +1,10 @@
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
- from typing import Any, Dict, List, Optional, Self
4
 
5
  from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.db import MongoDB
7
- from ctp_slack_bot.models import VectorQuery, RetreivedContext
8
 
9
  class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
10
  """
@@ -19,13 +19,15 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
19
  logger.debug("Created {}", self.__class__.__name__)
20
  return self
21
 
22
- async def initialize(self):
23
- """
24
- Initialize the database connection.
25
- """
26
- await mongodb.initialize()
 
27
 
28
- async def store(self, text: str, embedding: List[float], metadata: Dict[str, Any]) -> str:
 
29
  """
30
  Store text and its embedding vector in the database.
31
 
@@ -37,8 +39,8 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
37
  Returns:
38
  str: The ID of the stored document
39
  """
40
- if not mongodb.initialized:
41
- await mongodb.initialize()
42
 
43
  try:
44
  # Create document to store
@@ -49,7 +51,7 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
49
  }
50
 
51
  # Insert into collection
52
- result = await mongodb.vector_collection.insert_one(document)
53
  logger.debug(f"Stored document with ID: {result.inserted_id}")
54
 
55
  return str(result.inserted_id)
@@ -57,7 +59,7 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
57
  logger.error(f"Error storing embedding: {str(e)}")
58
  raise
59
 
60
- async def search_by_similarity(self, query: VectorQuery, query_embedding: List[float]) -> List[RetreivedContext]:
61
  """
62
  Query the vector database for similar documents.
63
 
@@ -68,8 +70,8 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
68
  Returns:
69
  List[RetreivedContext]: List of similar documents with similarity scores
70
  """
71
- if not mongodb.initialized:
72
- await mongodb.initialize()
73
 
74
  try:
75
  # Build aggregation pipeline for vector search
@@ -100,7 +102,7 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
100
  pipeline.insert(1, {"$match": metadata_filter})
101
 
102
  # Execute the pipeline
103
- results = await mongodb.vector_collection.aggregate(pipeline).to_list(length=query.k)
104
 
105
  # Convert to RetreivedContext objects directly
106
  context_results = []
@@ -113,7 +115,7 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall
113
  continue
114
 
115
  context_results.append(
116
- RetreivedContext(
117
  contextual_text=result["text"],
118
  metadata_source=result["metadata"].get("source", "unknown"),
119
  similarity_score=normalized_score,
 
1
  from loguru import logger
2
  from pydantic import BaseModel, model_validator
3
+ from typing import Any, Collection, Dict, List, Optional, Self, Sequence
4
 
5
  from ctp_slack_bot.core import Settings
6
  from ctp_slack_bot.db import MongoDB
7
+ from ctp_slack_bot.models import Chunk, Content, VectorizedChunk, VectorQuery
8
 
9
  class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB.
10
  """
 
19
  logger.debug("Created {}", self.__class__.__name__)
20
  return self
21
 
22
+ # Should not allow initialization calls to bubble up all the way to the surface ― sequester in `post_init` or the class on which it depends.
23
+ # async def initialize(self) -> None:
24
+ # """
25
+ # Initialize the database connection.
26
+ # """
27
+ # await self.mongo_db.initialize()
28
 
29
+ # TODO: Weight cost of going all async.
30
+ async def store(self, chunks: Collection[VectorizedChunk]) -> None:
31
  """
32
  Store text and its embedding vector in the database.
33
 
 
39
  Returns:
40
  str: The ID of the stored document
41
  """
42
+ if not self.mongo_db.initialized:
43
+ await self.mongo_db.initialize()
44
 
45
  try:
46
  # Create document to store
 
51
  }
52
 
53
  # Insert into collection
54
+ result = await self.mongo_db.vector_collection.insert_one(document)
55
  logger.debug(f"Stored document with ID: {result.inserted_id}")
56
 
57
  return str(result.inserted_id)
 
59
  logger.error(f"Error storing embedding: {str(e)}")
60
  raise
61
 
62
+ async def search_by_similarity(self, query: VectorQuery) -> Sequence[Chunk]:
63
  """
64
  Query the vector database for similar documents.
65
 
 
70
  Returns:
71
  List[RetreivedContext]: List of similar documents with similarity scores
72
  """
73
+ if not self.mongo_db.initialized:
74
+ await self.mongo_db.initialize()
75
 
76
  try:
77
  # Build aggregation pipeline for vector search
 
102
  pipeline.insert(1, {"$match": metadata_filter})
103
 
104
  # Execute the pipeline
105
+ results = await self.mongo_db.vector_collection.aggregate(pipeline).to_list(length=query.k)
106
 
107
  # Convert to RetreivedContext objects directly
108
  context_results = []
 
115
  continue
116
 
117
  context_results.append(
118
+ Content(
119
  contextual_text=result["text"],
120
  metadata_source=result["metadata"].get("source", "unknown"),
121
  similarity_score=normalized_score,
src/ctp_slack_bot/services/vectorization_service.py CHANGED
@@ -1,10 +1,13 @@
1
  from loguru import logger
2
- import numpy as np
 
3
  from openai import OpenAI
4
  from pydantic import BaseModel, model_validator
5
- from typing import List, Optional, Self
6
 
7
  from ctp_slack_bot.core import Settings
 
 
8
 
9
  class VectorizationService(BaseModel):
10
  """
@@ -12,57 +15,21 @@ class VectorizationService(BaseModel):
12
  """
13
 
14
  settings: Settings
15
- client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic.
16
-
17
- class Config:
18
- arbitrary_types_allowed = True
19
 
20
  @model_validator(mode='after')
21
  def post_init(self: Self) -> Self:
22
  logger.debug("Created {}", self.__class__.__name__)
23
  return self
24
-
25
- def get_embeddings(self, texts: List[str]) -> np.ndarray:
26
- """Get embeddings for a list of texts using OpenAI's API.
27
-
28
- Args:
29
- texts (List[str]): List of text chunks to embed
30
-
31
- Returns:
32
- np.ndarray: Array of embeddings with shape (n_texts, VECTOR_DIMENSION)
33
-
34
- Raises:
35
- ValueError: If the embedding dimensions don't match expected size
36
- """
37
- try:
38
- # Use the initialized client instead of the global openai module
39
- response = self.client.embeddings.create(
40
- model=self.settings.EMBEDDING_MODEL,
41
- input=texts,
42
- encoding_format="float" # Ensure we get raw float values
43
- )
44
-
45
- # Extract embeddings and verify dimensions
46
- embeddings = np.array([data.embedding for data in response.data])
47
-
48
- if embeddings.shape[1] != self.settings.VECTOR_DIMENSION:
49
- raise ValueError(
50
- f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, "
51
- f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION "
52
- f"in config.py to match the model's output."
53
- )
54
-
55
- return embeddings
56
-
57
- except Exception as e:
58
- print(f"Error getting embeddings: {str(e)}")
59
- pass
60
 
61
- def _test(self, list_of_strings: List[str] = ['Hello my sweet Svetlana.', 'You mean the world to me.']):
62
- """
63
- Test the vectorization service.
64
- """
65
- print('embedding list', list_of_strings)
66
- embeddings = self.get_embeddings(list_of_strings)
67
- print(embeddings)
68
- return embeddings
 
 
 
 
1
  from loguru import logger
2
+ from numpy import array
3
+ from numpy.typing import NDArray
4
  from openai import OpenAI
5
  from pydantic import BaseModel, model_validator
6
+ from typing import Self, Sequence
7
 
8
  from ctp_slack_bot.core import Settings
9
+ from ctp_slack_bot.models import Chunk, VectorizedChunk
10
+ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService
11
 
12
  class VectorizationService(BaseModel):
13
  """
 
15
  """
16
 
17
  settings: Settings
18
+ embeddings_model_service: EmbeddingsModelService
 
 
 
19
 
20
  @model_validator(mode='after')
21
  def post_init(self: Self) -> Self:
22
  logger.debug("Created {}", self.__class__.__name__)
23
  return self
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]:
26
+ embeddings = self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks])
27
+ return tuple(VectorizedChunk(
28
+ text=chunk.text,
29
+ parent_id=chunk.parent_id,
30
+ chunk_id=chunk.chunk_id,
31
+ metadata=chunk.metadata,
32
+ embedding=embedding
33
+ )
34
+ for chunk, embedding
35
+ in zip(chunks, embeddings))
src/ctp_slack_bot/tasks/scheduler.py CHANGED
@@ -6,7 +6,7 @@ from loguru import logger
6
  from pytz import timezone
7
  from typing import Optional
8
 
9
- from ctp_slack_bot import Container
10
 
11
  @inject
12
  def start_scheduler(container: Container) -> AsyncIOScheduler:
 
6
  from pytz import timezone
7
  from typing import Optional
8
 
9
+ from ctp_slack_bot.containers import Container
10
 
11
  @inject
12
  def start_scheduler(container: Container) -> AsyncIOScheduler: