LiKenun commited on
Commit
98a6105
·
1 Parent(s): 961770c

Add `VECTORIZED_CHUNKS_SEARCH_INDEX_NAME` environment variable

Browse files
.env.template CHANGED
@@ -17,6 +17,8 @@ TOP_K_MATCHES=5
17
  # MongoDB Configuration
18
  MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
19
  MONGODB_NAME=ctp_slack_bot
 
 
20
  SCORE_THRESHOLD=0.5
21
 
22
  # Hugging Face Configuration
 
17
  # MongoDB Configuration
18
  MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
19
  MONGODB_NAME=ctp_slack_bot
20
+ VECTORIZED_CHUNKS_COLLECTION_NAME=vectorized_chunks
21
+ VECTORIZED_CHUNKS_SEARCH_INDEX_NAME=
22
  SCORE_THRESHOLD=0.5
23
 
24
  # Hugging Face Configuration
scripts/run.sh CHANGED
@@ -4,4 +4,4 @@ parent_path=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P)
4
 
5
  cd "${parent_path}/.."
6
 
7
- python "temporary_health_check_server.py" & python -m ctp_slack_bot.app
 
4
 
5
  cd "${parent_path}/.."
6
 
7
+ python -m ctp_slack_bot.app & python "temporary_health_check_server.py"
src/ctp_slack_bot/containers.py CHANGED
@@ -1,9 +1,10 @@
1
  from dependency_injector.containers import DeclarativeContainer
2
  from dependency_injector.providers import Callable, Resource, Singleton
3
  from importlib import import_module
4
- from pathlib import Path
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
6
  from slack_bolt.async_app import AsyncApp
 
7
 
8
  from ctp_slack_bot.core.config import Settings
9
  from ctp_slack_bot.db.mongo_db import MongoDBResource
@@ -22,15 +23,18 @@ from ctp_slack_bot.services.slack_service import SlackServiceResource
22
  from ctp_slack_bot.services.vectorization_service import VectorizationService
23
 
24
 
25
- def __load_plugins(plugin_dir) -> None:
26
- for path in Path(plugin_dir).glob("*.py"):
27
- if path.stem == "__init__":
28
- continue # Skip __init__.py files
29
- module_name = f"{plugin_dir.replace('/', '.')}.{path.stem}"
30
- import_module(module_name)
 
 
 
 
31
 
32
-
33
- __load_plugins("ctp_slack_bot/mime_type_handlers")
34
 
35
 
36
  class Container(DeclarativeContainer): # TODO: audit for potential async-related bugs.
@@ -38,7 +42,7 @@ class Container(DeclarativeContainer): # TODO: audit for potential async-related
38
  event_brokerage_service = Singleton(EventBrokerageService)
39
  schedule_service = Resource (ScheduleServiceResource,
40
  settings=settings)
41
- mongo_db = Resource (MongoDBResource, # TODO: generalize to any database.
42
  settings=settings)
43
  vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
44
  settings=settings,
 
1
  from dependency_injector.containers import DeclarativeContainer
2
  from dependency_injector.providers import Callable, Resource, Singleton
3
  from importlib import import_module
4
+ from pkgutil import iter_modules
5
  from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
6
  from slack_bolt.async_app import AsyncApp
7
+ from types import ModuleType
8
 
9
  from ctp_slack_bot.core.config import Settings
10
  from ctp_slack_bot.db.mongo_db import MongoDBResource
 
23
  from ctp_slack_bot.services.vectorization_service import VectorizationService
24
 
25
 
26
+ def __scan_and_import_modules(package_name: str) -> None:
27
+ try:
28
+ package: Optional[ModuleType] = import_module(package_name)
29
+ except ImportError as e:
30
+ raise ValueError(f"Package {package_name} not found") from e
31
+ if not hasattr(package, '__path__'):
32
+ raise ValueError(f"{package_name} is not a package")
33
+ for _, module_name, is_pkg in iter_modules(package.__path__):
34
+ if not is_pkg:
35
+ import_module(f"{package.__name__}.{module_name}")
36
 
37
+ __scan_and_import_modules("ctp_slack_bot.mime_type_handlers")
 
38
 
39
 
40
  class Container(DeclarativeContainer): # TODO: audit for potential async-related bugs.
 
42
  event_brokerage_service = Singleton(EventBrokerageService)
43
  schedule_service = Resource (ScheduleServiceResource,
44
  settings=settings)
45
+ mongo_db = Resource (MongoDBResource,
46
  settings=settings)
47
  vectorized_chunk_repository = Resource (MongoVectorizedChunkRepositoryResource,
48
  settings=settings,
src/ctp_slack_bot/core/config.py CHANGED
@@ -36,8 +36,9 @@ class Settings(BaseSettings):
36
  # MongoDB Configuration
37
  MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
38
  MONGODB_NAME: str
 
 
39
  SCORE_THRESHOLD: NonNegativeFloat
40
- VECTORIZED_CHUNKS_COLLECTION_NAME: str
41
 
42
  # Hugging Face Configuration
43
  HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
 
36
  # MongoDB Configuration
37
  MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
38
  MONGODB_NAME: str
39
+ VECTORIZED_CHUNKS_COLLECTION_NAME: str = "vectorized_chunks"
40
+ VECTORIZED_CHUNKS_SEARCH_INDEX_NAME: Optional[str] = None
41
  SCORE_THRESHOLD: NonNegativeFloat
 
42
 
43
  # Hugging Face Configuration
44
  HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py CHANGED
@@ -54,7 +54,7 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
54
  pipeline = [
55
  {
56
  "$vectorSearch": {
57
- "index": f"{self.collection.name}_vector_index",
58
  "path": "embedding",
59
  "queryVector": query.query_embeddings,
60
  "numCandidates": query.k * 10,
 
54
  pipeline = [
55
  {
56
  "$vectorSearch": {
57
+ "index": self.settings.VECTORIZED_CHUNKS_SEARCH_INDEX_NAME or f"{self.collection.name}_vector_index",
58
  "path": "embedding",
59
  "queryVector": query.query_embeddings,
60
  "numCandidates": query.k * 10,
src/ctp_slack_bot/db/repositories/vector_repository_base.py CHANGED
@@ -25,7 +25,7 @@ class VectorRepositoryBase(ABC, BaseModel):
25
  """
26
  Ensure that a vector search index exists.
27
  """
28
- index_name = f"{self.collection.name}_vector_index"
29
  try:
30
  existing_indexes = [index["name"] async for index in self.collection.list_search_indexes()]
31
  logger.debug("{} existing indices were found: {}", len(existing_indexes), existing_indexes)
@@ -56,7 +56,7 @@ class VectorRepositoryBase(ABC, BaseModel):
56
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
57
  # Create a fallback standard index on embedding field.
58
  await self.collection.create_index("embedding")
59
- logger.info("Created standard index on 'embedding' field as fallback.")
60
  else:
61
- logger.error("Failed to create vector index: {}", e)
62
  raise
 
25
  """
26
  Ensure that a vector search index exists.
27
  """
28
+ index_name = self.settings.VECTORIZED_CHUNKS_SEARCH_INDEX_NAME or f"{self.collection.name}_vector_index"
29
  try:
30
  existing_indexes = [index["name"] async for index in self.collection.list_search_indexes()]
31
  logger.debug("{} existing indices were found: {}", len(existing_indexes), existing_indexes)
 
56
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
57
  # Create a fallback standard index on embedding field.
58
  await self.collection.create_index("embedding")
59
+ logger.info("Created standard index on {} field as fallback.", "embedding")
60
  else:
61
+ logger.error("Failed to create any index: {}", e)
62
  raise
src/ctp_slack_bot/services/language_model_service.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from loguru import logger
2
  from openai import AsyncOpenAI
3
  from openai.types.chat import ChatCompletion
@@ -23,30 +24,29 @@ class LanguageModelService(BaseModel):
23
  self._open_ai_client = AsyncOpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
24
  logger.debug("Created {}", self.__class__.__name__)
25
 
26
- async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str:
27
  """Generate a response using OpenAI’s API with retrieved context.
28
-
29
  Args:
30
  question (str): The user’s question
31
  context (List[RetreivedContext]): The context retreived for answering the question
32
-
33
  Returns:
34
  str: Generated answer
35
  """
36
  logger.debug("Generating response for question “{}” using {} context chunks…", question, len(context))
37
  messages = [
38
  {"role": "system", "content": self.settings.SYSTEM_PROMPT},
39
- {"role": "user", "content":
40
- f"""
41
- Inquirer Name: {asker}
 
42
 
43
  Question:
44
  {question}
45
 
46
  Context from class materials and transcripts:
47
- {'\n\n'.join(chunk.text for chunk in context)}
48
-
49
- Please answer the Question based on the Context from class materials and transcripts. If the context doesn’t contain relevant information, acknowledge that and suggest asking the professor. In all other cases, carry on."""}
50
  ]
51
  response: ChatCompletion = await self._open_ai_client.chat.completions.create(
52
  model=self.settings.CHAT_MODEL,
 
1
+ from datetime import datetime
2
  from loguru import logger
3
  from openai import AsyncOpenAI
4
  from openai.types.chat import ChatCompletion
 
24
  self._open_ai_client = AsyncOpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value())
25
  logger.debug("Created {}", self.__class__.__name__)
26
 
27
+ async def answer_question(self, asker: str, question: str, context: Collection[Chunk]) -> str: # TODO: generify into just another agent.
28
  """Generate a response using OpenAI’s API with retrieved context.
29
+
30
  Args:
31
  question (str): The user’s question
32
  context (List[RetreivedContext]): The context retreived for answering the question
33
+
34
  Returns:
35
  str: Generated answer
36
  """
37
  logger.debug("Generating response for question “{}” using {} context chunks…", question, len(context))
38
  messages = [
39
  {"role": "system", "content": self.settings.SYSTEM_PROMPT},
40
+ {"role": "user", "content": (
41
+ f"""Inquirer Name: {asker}
42
+
43
+ Current Time: {datetime.now().isoformat(" ", "seconds")}
44
 
45
  Question:
46
  {question}
47
 
48
  Context from class materials and transcripts:
49
+ {'\n\n'.join(chunk.text for chunk in context)}""")}
 
 
50
  ]
51
  response: ChatCompletion = await self._open_ai_client.chat.completions.create(
52
  model=self.settings.CHAT_MODEL,