Spaces:
Runtime error
Runtime error
Refactor #4
Browse files- notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb +5 -13
- src/ctp_slack_bot/containers.py +7 -2
- src/ctp_slack_bot/mime_type_handlers/base.py +0 -6
- src/ctp_slack_bot/mime_type_handlers/text/vtt.py +2 -2
- src/ctp_slack_bot/models/webvtt.py +8 -8
- src/ctp_slack_bot/services/embeddings_model_service.py +3 -7
- src/ctp_slack_bot/services/google_drive_service.py +9 -10
- src/ctp_slack_bot/services/language_model_service.py +2 -6
- src/ctp_slack_bot/services/slack_service.py +5 -5
notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb
CHANGED
@@ -27,6 +27,7 @@
|
|
27 |
"display_html = partial(display_html, raw=True)\n",
|
28 |
"\n",
|
29 |
"container = Container()\n",
|
|
|
30 |
"mongo_db = await container.mongo_db()\n",
|
31 |
"google_drive_service = container.google_drive_service()\n",
|
32 |
"vectorization_service = container.vectorization_service()\n",
|
@@ -85,19 +86,10 @@
|
|
85 |
"metadata": {},
|
86 |
"outputs": [],
|
87 |
"source": [
|
88 |
-
"web_vtt_parser =
|
89 |
"display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
|
90 |
]
|
91 |
},
|
92 |
-
{
|
93 |
-
"cell_type": "code",
|
94 |
-
"execution_count": null,
|
95 |
-
"metadata": {},
|
96 |
-
"outputs": [],
|
97 |
-
"source": [
|
98 |
-
"display_html(f\"<code>{id(container.mime_type_handlers()[MIME_TYPE])}</code>\")"
|
99 |
-
]
|
100 |
-
},
|
101 |
{
|
102 |
"cell_type": "code",
|
103 |
"execution_count": null,
|
@@ -111,8 +103,8 @@
|
|
111 |
" \"modificationTime\": metadata.modified_time\n",
|
112 |
" },\n",
|
113 |
" google_drive_service.read_file_by_id(metadata.id))\n",
|
114 |
-
"
|
115 |
-
"
|
116 |
"\n",
|
117 |
"display_html(f\"Processed {len(web_vtts)} files.\")"
|
118 |
]
|
@@ -147,7 +139,7 @@
|
|
147 |
"metadata": {},
|
148 |
"outputs": [],
|
149 |
"source": [
|
150 |
-
"
|
151 |
]
|
152 |
}
|
153 |
],
|
|
|
27 |
"display_html = partial(display_html, raw=True)\n",
|
28 |
"\n",
|
29 |
"container = Container()\n",
|
30 |
+
"mime_type_handlers = container.mime_type_handlers()\n",
|
31 |
"mongo_db = await container.mongo_db()\n",
|
32 |
"google_drive_service = container.google_drive_service()\n",
|
33 |
"vectorization_service = container.vectorization_service()\n",
|
|
|
86 |
"metadata": {},
|
87 |
"outputs": [],
|
88 |
"source": [
|
89 |
+
"web_vtt_parser = mime_type_handlers[MIME_TYPE]\n",
|
90 |
"display_html(f\"<p>{escape(str(type(web_vtt_parser)))}</p><code>{id(web_vtt_parser)}</code>\")"
|
91 |
]
|
92 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
{
|
94 |
"cell_type": "code",
|
95 |
"execution_count": null,
|
|
|
103 |
" \"modificationTime\": metadata.modified_time\n",
|
104 |
" },\n",
|
105 |
" google_drive_service.read_file_by_id(metadata.id))\n",
|
106 |
+
" for metadata\n",
|
107 |
+
" in metadata_to_process)\n",
|
108 |
"\n",
|
109 |
"display_html(f\"Processed {len(web_vtts)} files.\")"
|
110 |
]
|
|
|
139 |
"metadata": {},
|
140 |
"outputs": [],
|
141 |
"source": [
|
142 |
+
"await container.shutdown_resources()"
|
143 |
]
|
144 |
}
|
145 |
],
|
src/ctp_slack_bot/containers.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
from dependency_injector.providers import Callable, Dict, List, Resource, Singleton
|
3 |
from importlib import import_module
|
|
|
4 |
from pkgutil import iter_modules
|
5 |
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
6 |
from slack_bolt.async_app import AsyncApp
|
@@ -50,8 +51,11 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
|
|
50 |
vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
|
51 |
settings=settings,
|
52 |
mongo_db=mongo_db)
|
53 |
-
|
54 |
settings=settings)
|
|
|
|
|
|
|
55 |
vectorization_service = Singleton(VectorizationService,
|
56 |
settings=settings,
|
57 |
embeddings_model_service=embeddings_model_service)
|
@@ -65,7 +69,8 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
|
|
65 |
vectorization_service=vectorization_service,
|
66 |
vectorized_chunk_repository=vectorized_chunk_repository)
|
67 |
language_model_service = Singleton(LanguageModelService,
|
68 |
-
settings=settings
|
|
|
69 |
answer_retrieval_service = Singleton(AnswerRetrievalService,
|
70 |
settings=settings,
|
71 |
event_brokerage_service=event_brokerage_service,
|
|
|
1 |
from dependency_injector.containers import DeclarativeContainer
|
2 |
from dependency_injector.providers import Callable, Dict, List, Resource, Singleton
|
3 |
from importlib import import_module
|
4 |
+
from openai import AsyncOpenAI
|
5 |
from pkgutil import iter_modules
|
6 |
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
7 |
from slack_bolt.async_app import AsyncApp
|
|
|
51 |
vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
|
52 |
settings=settings,
|
53 |
mongo_db=mongo_db)
|
54 |
+
open_ai_client = Singleton(lambda settings: AsyncOpenAI(api_key=settings.openai_api_key.get_secret_value()),
|
55 |
settings=settings)
|
56 |
+
embeddings_model_service = Singleton(EmbeddingsModelService,
|
57 |
+
settings=settings,
|
58 |
+
open_ai_client=open_ai_client)
|
59 |
vectorization_service = Singleton(VectorizationService,
|
60 |
settings=settings,
|
61 |
embeddings_model_service=embeddings_model_service)
|
|
|
69 |
vectorization_service=vectorization_service,
|
70 |
vectorized_chunk_repository=vectorized_chunk_repository)
|
71 |
language_model_service = Singleton(LanguageModelService,
|
72 |
+
settings=settings,
|
73 |
+
open_ai_client=open_ai_client)
|
74 |
answer_retrieval_service = Singleton(AnswerRetrievalService,
|
75 |
settings=settings,
|
76 |
event_brokerage_service=event_brokerage_service,
|
src/ctp_slack_bot/mime_type_handlers/base.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from abc import ABC, abstractmethod
|
2 |
-
from functools import lru_cache
|
3 |
from importlib import import_module
|
4 |
from types import MappingProxyType
|
5 |
from typing import Any, ClassVar, Mapping, Optional
|
@@ -32,8 +31,3 @@ class MimeTypeHandlerRegistry:
|
|
32 |
cls._registry[mime_type] = handler_cls
|
33 |
return handler_cls
|
34 |
return decorator
|
35 |
-
|
36 |
-
@classmethod
|
37 |
-
@lru_cache
|
38 |
-
def get_handler(cls, mime_type: str) -> Optional[MimeTypeHandler]:
|
39 |
-
return cls._registry.get(mime_type)()
|
|
|
1 |
from abc import ABC, abstractmethod
|
|
|
2 |
from importlib import import_module
|
3 |
from types import MappingProxyType
|
4 |
from typing import Any, ClassVar, Mapping, Optional
|
|
|
31 |
cls._registry[mime_type] = handler_cls
|
32 |
return handler_cls
|
33 |
return decorator
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/mime_type_handlers/text/vtt.py
CHANGED
@@ -3,7 +3,7 @@ from io import BytesIO
|
|
3 |
from pydantic import ConfigDict
|
4 |
from re import compile as compile_re, Pattern
|
5 |
from types import MappingProxyType
|
6 |
-
from typing import Any, Mapping, Optional, Self
|
7 |
from webvtt import WebVTT
|
8 |
|
9 |
from ctp_slack_bot.mime_type_handlers import MimeTypeHandler, MimeTypeHandlerRegistry
|
@@ -15,7 +15,7 @@ class WebVTTMimeTypeHandler(MimeTypeHandler):
|
|
15 |
|
16 |
model_config = ConfigDict(frozen=True)
|
17 |
|
18 |
-
ISO_DATE_TIME_PATTERN: Pattern = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
|
19 |
|
20 |
@classmethod
|
21 |
def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
|
|
|
3 |
from pydantic import ConfigDict
|
4 |
from re import compile as compile_re, Pattern
|
5 |
from types import MappingProxyType
|
6 |
+
from typing import Any, ClassVar, Mapping, Optional, Self
|
7 |
from webvtt import WebVTT
|
8 |
|
9 |
from ctp_slack_bot.mime_type_handlers import MimeTypeHandler, MimeTypeHandlerRegistry
|
|
|
15 |
|
16 |
model_config = ConfigDict(frozen=True)
|
17 |
|
18 |
+
ISO_DATE_TIME_PATTERN: ClassVar[Pattern] = compile_re(r"Start time: (\d{4}-\d{2}-\d{2}(?: \d{2}:\d{2}:\d{2}(?:Z|[+-]\d{2}:\d{2})?)?)")
|
19 |
|
20 |
@classmethod
|
21 |
def __get_start_time(cls, web_vtt: WebVTT) -> Optional[datetime]:
|
src/ctp_slack_bot/models/webvtt.py
CHANGED
@@ -3,23 +3,20 @@ from io import BytesIO
|
|
3 |
from more_itertools import windowed
|
4 |
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
5 |
from types import MappingProxyType
|
6 |
-
from typing import Any, Literal, Mapping, Optional, Self
|
7 |
from webvtt import Caption, WebVTT
|
8 |
|
9 |
from ctp_slack_bot.models.base import Chunk, Content
|
10 |
from ctp_slack_bot.utils import to_deep_immutable
|
11 |
|
12 |
|
13 |
-
CHUNK_FRAMES_OVERLAP = 1
|
14 |
-
CHUNK_FRAMES_WINDOW = 5
|
15 |
-
SPEAKER_SPEECH_TEXT_SEPARATOR = ": "
|
16 |
-
|
17 |
-
|
18 |
class WebVTTFrame(BaseModel):
|
19 |
"""Represents a WebVTT frame"""
|
20 |
|
21 |
model_config = ConfigDict(frozen=True)
|
22 |
|
|
|
|
|
23 |
identifier: str
|
24 |
start: timedelta
|
25 |
end: timedelta
|
@@ -31,7 +28,7 @@ class WebVTTFrame(BaseModel):
|
|
31 |
identifier = caption.identifier if caption.identifier else str(index)
|
32 |
start = timedelta(**caption.start_time.__dict__)
|
33 |
end = timedelta(**caption.end_time.__dict__)
|
34 |
-
match caption.text.split(
|
35 |
case [speaker, speech]:
|
36 |
return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
|
37 |
case [speech]:
|
@@ -41,6 +38,9 @@ class WebVTTFrame(BaseModel):
|
|
41 |
class WebVTTContent(Content):
|
42 |
"""Represents parsed WebVTT content."""
|
43 |
|
|
|
|
|
|
|
44 |
id: str
|
45 |
metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
|
46 |
start_time: Optional[datetime]
|
@@ -52,7 +52,7 @@ class WebVTTContent(Content):
|
|
52 |
def get_chunks(self: Self) -> tuple[Chunk]:
|
53 |
windows = (tuple(filter(None, window))
|
54 |
for window
|
55 |
-
in windowed(self.frames, CHUNK_FRAMES_WINDOW, step=CHUNK_FRAMES_WINDOW-CHUNK_FRAMES_OVERLAP))
|
56 |
return tuple(Chunk(text="\n\n".join(": ".join(filter(None, (frame.speaker, frame.speech)))
|
57 |
for frame
|
58 |
in frames),
|
|
|
3 |
from more_itertools import windowed
|
4 |
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
5 |
from types import MappingProxyType
|
6 |
+
from typing import Any, ClassVar, Literal, Mapping, Optional, Self
|
7 |
from webvtt import Caption, WebVTT
|
8 |
|
9 |
from ctp_slack_bot.models.base import Chunk, Content
|
10 |
from ctp_slack_bot.utils import to_deep_immutable
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
class WebVTTFrame(BaseModel):
|
14 |
"""Represents a WebVTT frame"""
|
15 |
|
16 |
model_config = ConfigDict(frozen=True)
|
17 |
|
18 |
+
_SPEAKER_SPEECH_TEXT_SEPARATOR: ClassVar[str] = ": "
|
19 |
+
|
20 |
identifier: str
|
21 |
start: timedelta
|
22 |
end: timedelta
|
|
|
28 |
identifier = caption.identifier if caption.identifier else str(index)
|
29 |
start = timedelta(**caption.start_time.__dict__)
|
30 |
end = timedelta(**caption.end_time.__dict__)
|
31 |
+
match caption.text.split(cls._SPEAKER_SPEECH_TEXT_SEPARATOR, 1):
|
32 |
case [speaker, speech]:
|
33 |
return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech)
|
34 |
case [speech]:
|
|
|
38 |
class WebVTTContent(Content):
|
39 |
"""Represents parsed WebVTT content."""
|
40 |
|
41 |
+
CHUNK_FRAMES_OVERLAP: ClassVar[int] = 1
|
42 |
+
CHUNK_FRAMES_WINDOW: ClassVar[int] = 5
|
43 |
+
|
44 |
id: str
|
45 |
metadata: Mapping[str, Any] = Field(default_factory=lambda: MappingProxyType({}))
|
46 |
start_time: Optional[datetime]
|
|
|
52 |
def get_chunks(self: Self) -> tuple[Chunk]:
|
53 |
windows = (tuple(filter(None, window))
|
54 |
for window
|
55 |
+
in windowed(self.frames, self.CHUNK_FRAMES_WINDOW, step=self.CHUNK_FRAMES_WINDOW-self.CHUNK_FRAMES_OVERLAP))
|
56 |
return tuple(Chunk(text="\n\n".join(": ".join(filter(None, (frame.speaker, frame.speech)))
|
57 |
for frame
|
58 |
in frames),
|
src/ctp_slack_bot/services/embeddings_model_service.py
CHANGED
@@ -11,14 +11,10 @@ class EmbeddingsModelService(ApplicationComponentBase):
|
|
11 |
Service for embeddings model operations.
|
12 |
"""
|
13 |
|
14 |
-
model_config = ConfigDict(frozen=True)
|
15 |
|
16 |
settings: Settings
|
17 |
-
|
18 |
-
|
19 |
-
def model_post_init(self: Self, context: Any, /) -> None:
|
20 |
-
super().model_post_init(context)
|
21 |
-
self._open_ai_client = AsyncOpenAI(api_key=self.settings.openai_api_key.get_secret_value())
|
22 |
|
23 |
async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]:
|
24 |
"""Get embeddings for a collection of texts using OpenAI’s API.
|
@@ -33,7 +29,7 @@ class EmbeddingsModelService(ApplicationComponentBase):
|
|
33 |
ValueError: If the embedding dimensions don't match expected size
|
34 |
"""
|
35 |
logger.debug("Creating embeddings for {} text string(s)…", len(texts))
|
36 |
-
response = await self.
|
37 |
model=self.settings.embedding_model,
|
38 |
input=texts,
|
39 |
encoding_format="float" # Ensure we get raw float values.
|
|
|
11 |
Service for embeddings model operations.
|
12 |
"""
|
13 |
|
14 |
+
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
15 |
|
16 |
settings: Settings
|
17 |
+
open_ai_client: AsyncOpenAI
|
|
|
|
|
|
|
|
|
18 |
|
19 |
async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]:
|
20 |
"""Get embeddings for a collection of texts using OpenAI’s API.
|
|
|
29 |
ValueError: If the embedding dimensions don't match expected size
|
30 |
"""
|
31 |
logger.debug("Creating embeddings for {} text string(s)…", len(texts))
|
32 |
+
response = await self.open_ai_client.embeddings.create(
|
33 |
model=self.settings.embedding_model,
|
34 |
input=texts,
|
35 |
encoding_format="float" # Ensure we get raw float values.
|
src/ctp_slack_bot/services/google_drive_service.py
CHANGED
@@ -9,21 +9,20 @@ from io import BytesIO
|
|
9 |
from itertools import chain
|
10 |
from loguru import logger
|
11 |
from pydantic import ConfigDict, PrivateAttr
|
12 |
-
from typing import Any, Collection, Optional, Self
|
13 |
|
14 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
15 |
from ctp_slack_bot.models import GoogleDriveMetadata
|
16 |
|
17 |
|
18 |
-
FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
|
19 |
-
PATH_SEPARATOR: str = "/"
|
20 |
-
|
21 |
-
|
22 |
class GoogleDriveService(ApplicationComponentBase):
|
23 |
"""Service for interacting with Google Drive."""
|
24 |
|
25 |
model_config = ConfigDict(frozen=True)
|
26 |
|
|
|
|
|
|
|
27 |
settings: Settings
|
28 |
_google_drive_client: Resource
|
29 |
_folder_cache: TTLCache = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
|
@@ -53,9 +52,9 @@ class GoogleDriveService(ApplicationComponentBase):
|
|
53 |
|
54 |
current_id = self.settings.google_drive_root_id
|
55 |
try:
|
56 |
-
for part in folder_path.split(
|
57 |
results = self._google_drive_client.files().list(
|
58 |
-
q=f"name='{part.replace("\\", "\\\\").replace("'", "\\'")}' and mimeType='{
|
59 |
fields="files(id,name)",
|
60 |
supportsAllDrives=True,
|
61 |
includeItemsFromAllDrives=True
|
@@ -87,10 +86,10 @@ class GoogleDriveService(ApplicationComponentBase):
|
|
87 |
if not recursive:
|
88 |
return results
|
89 |
return tuple(reduce(chain,
|
90 |
-
(self._list_directory(f"{folder_path}{
|
91 |
for result
|
92 |
in results
|
93 |
-
if result.mime_type ==
|
94 |
results))
|
95 |
except HttpError as e:
|
96 |
logger.error("Error listing folder by path, {}: {}", folder_path, e)
|
@@ -108,7 +107,7 @@ class GoogleDriveService(ApplicationComponentBase):
|
|
108 |
def get_metadata(self: Self, item_path: str) -> Optional[GoogleDriveMetadata]:
|
109 |
"""Get metadata for a specific file/folder by path."""
|
110 |
|
111 |
-
match item_path.rsplit(
|
112 |
case [item_name]:
|
113 |
folder_path = ""
|
114 |
folder_id = self.settings.google_drive_root_id
|
|
|
9 |
from itertools import chain
|
10 |
from loguru import logger
|
11 |
from pydantic import ConfigDict, PrivateAttr
|
12 |
+
from typing import Any, ClassVar, Collection, Optional, Self
|
13 |
|
14 |
from ctp_slack_bot.core import ApplicationComponentBase, Settings
|
15 |
from ctp_slack_bot.models import GoogleDriveMetadata
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
18 |
class GoogleDriveService(ApplicationComponentBase):
|
19 |
"""Service for interacting with Google Drive."""
|
20 |
|
21 |
model_config = ConfigDict(frozen=True)
|
22 |
|
23 |
+
_FOLDER_MIME_TYPE: ClassVar[str] = "application/vnd.google-apps.folder"
|
24 |
+
_PATH_SEPARATOR: ClassVar[str] = "/"
|
25 |
+
|
26 |
settings: Settings
|
27 |
_google_drive_client: Resource
|
28 |
_folder_cache: TTLCache = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60))
|
|
|
52 |
|
53 |
current_id = self.settings.google_drive_root_id
|
54 |
try:
|
55 |
+
for part in folder_path.split(self._PATH_SEPARATOR):
|
56 |
results = self._google_drive_client.files().list(
|
57 |
+
q=f"name='{part.replace("\\", "\\\\").replace("'", "\\'")}' and mimeType='{self._FOLDER_MIME_TYPE}' and '{current_id}' in parents",
|
58 |
fields="files(id,name)",
|
59 |
supportsAllDrives=True,
|
60 |
includeItemsFromAllDrives=True
|
|
|
86 |
if not recursive:
|
87 |
return results
|
88 |
return tuple(reduce(chain,
|
89 |
+
(self._list_directory(f"{folder_path}{self._PATH_SEPARATOR}{result.name}", result.id, True)
|
90 |
for result
|
91 |
in results
|
92 |
+
if result.mime_type == self._FOLDER_MIME_TYPE),
|
93 |
results))
|
94 |
except HttpError as e:
|
95 |
logger.error("Error listing folder by path, {}: {}", folder_path, e)
|
|
|
107 |
def get_metadata(self: Self, item_path: str) -> Optional[GoogleDriveMetadata]:
|
108 |
"""Get metadata for a specific file/folder by path."""
|
109 |
|
110 |
+
match item_path.rsplit(self._PATH_SEPARATOR, 1):
|
111 |
case [item_name]:
|
112 |
folder_path = ""
|
113 |
folder_id = self.settings.google_drive_root_id
|
src/ctp_slack_bot/services/language_model_service.py
CHANGED
@@ -16,11 +16,7 @@ class LanguageModelService(ApplicationComponentBase):
|
|
16 |
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
17 |
|
18 |
settings: Settings
|
19 |
-
|
20 |
-
|
21 |
-
def model_post_init(self: Self, context: Any, /) -> None:
|
22 |
-
super().model_post_init(context)
|
23 |
-
self._open_ai_client = AsyncOpenAI(api_key=self.settings.openai_api_key.get_secret_value())
|
24 |
|
25 |
async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str: # TODO: generify into just another agent.
|
26 |
"""Generate a response using OpenAI’s API with retrieved context.
|
@@ -46,7 +42,7 @@ class LanguageModelService(ApplicationComponentBase):
|
|
46 |
Context from class materials and transcripts:
|
47 |
{'\n\n'.join(chunk.text for chunk in context)}""")}
|
48 |
]
|
49 |
-
response = await self.
|
50 |
model=self.settings.chat_model,
|
51 |
messages=messages,
|
52 |
max_tokens=self.settings.max_tokens,
|
|
|
16 |
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
17 |
|
18 |
settings: Settings
|
19 |
+
open_ai_client: AsyncOpenAI
|
|
|
|
|
|
|
|
|
20 |
|
21 |
async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str: # TODO: generify into just another agent.
|
22 |
"""Generate a response using OpenAI’s API with retrieved context.
|
|
|
42 |
Context from class materials and transcripts:
|
43 |
{'\n\n'.join(chunk.text for chunk in context)}""")}
|
44 |
]
|
45 |
+
response = await self.open_ai_client.chat.completions.create(
|
46 |
model=self.settings.chat_model,
|
47 |
messages=messages,
|
48 |
max_tokens=self.settings.max_tokens,
|
src/ctp_slack_bot/services/slack_service.py
CHANGED
@@ -7,7 +7,7 @@ from re import compile as compile_re, Pattern
|
|
7 |
from slack_sdk.errors import SlackApiError
|
8 |
from slack_bolt.async_app import AsyncApp
|
9 |
from slack_sdk.web.async_slack_response import AsyncSlackResponse
|
10 |
-
from typing import Any, Mapping, MutableMapping, Optional, Self, Set
|
11 |
|
12 |
from ctp_slack_bot.core import HealthReportingApplicationComponentBase
|
13 |
from ctp_slack_bot.enums import EventType
|
@@ -22,8 +22,8 @@ class SlackService(HealthReportingApplicationComponentBase):
|
|
22 |
|
23 |
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
24 |
|
25 |
-
|
26 |
-
|
27 |
|
28 |
event_brokerage_service: EventBrokerageService
|
29 |
http_client: AsyncClient
|
@@ -84,7 +84,7 @@ class SlackService(HealthReportingApplicationComponentBase):
|
|
84 |
await self._process_message(body)
|
85 |
|
86 |
async def _look_up_name(self: Self, id: str) -> Optional[str]:
|
87 |
-
if self.
|
88 |
match await self.slack_bolt_app.client.users_info(id):
|
89 |
case AsyncSlackResponse(data={"ok": True, "user": user}):
|
90 |
match user:
|
@@ -104,7 +104,7 @@ class SlackService(HealthReportingApplicationComponentBase):
|
|
104 |
await self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message)
|
105 |
|
106 |
async def _resolve_user_mentions(self: Self, text: str) -> str:
|
107 |
-
matches = tuple(self.
|
108 |
unique_ids = frozenset(match.group(1) for match in matches)
|
109 |
await self._ensure_ids_in_id_name_map(unique_ids)
|
110 |
parts = []
|
|
|
7 |
from slack_sdk.errors import SlackApiError
|
8 |
from slack_bolt.async_app import AsyncApp
|
9 |
from slack_sdk.web.async_slack_response import AsyncSlackResponse
|
10 |
+
from typing import Any, ClassVar, Mapping, MutableMapping, Optional, Self, Set
|
11 |
|
12 |
from ctp_slack_bot.core import HealthReportingApplicationComponentBase
|
13 |
from ctp_slack_bot.enums import EventType
|
|
|
22 |
|
23 |
model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
|
24 |
|
25 |
+
_SLACK_USER_ID_PATTERN: ClassVar[Pattern] = compile_re(r"U\d+")
|
26 |
+
_SLACK_USER_MENTION_PATTERN: ClassVar[Pattern] = compile_re(r"<@(U[A-Z0-9]+)>")
|
27 |
|
28 |
event_brokerage_service: EventBrokerageService
|
29 |
http_client: AsyncClient
|
|
|
84 |
await self._process_message(body)
|
85 |
|
86 |
async def _look_up_name(self: Self, id: str) -> Optional[str]:
|
87 |
+
if self._SLACK_USER_ID_PATTERN.fullmatch(id):
|
88 |
match await self.slack_bolt_app.client.users_info(id):
|
89 |
case AsyncSlackResponse(data={"ok": True, "user": user}):
|
90 |
match user:
|
|
|
104 |
await self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message)
|
105 |
|
106 |
async def _resolve_user_mentions(self: Self, text: str) -> str:
|
107 |
+
matches = tuple(self._SLACK_USER_MENTION_PATTERN.finditer(text))
|
108 |
unique_ids = frozenset(match.group(1) for match in matches)
|
109 |
await self._ensure_ids_in_id_name_map(unique_ids)
|
110 |
parts = []
|