LiKenun commited on
Commit
7aeec4b
·
1 Parent(s): bb5dde5

Add parent-chunk identifier index

Browse files
src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py CHANGED
@@ -146,10 +146,16 @@ class MongoVectorizedChunkRepository(VectorRepositoryBase, VectorizedChunkReposi
146
  result = await self.collection.delete_many({"parent_id": parent_id})
147
  return result.deleted_count
148
 
 
 
 
 
 
 
149
 
150
  class MongoVectorizedChunkRepositoryResource(AsyncResource):
151
  async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
152
  vectorized_chunk_collection = await mongo_db.get_collection("vectorized_chunks")
153
  vectorized_chunk_repository = MongoVectorizedChunkRepository(settings=settings, collection=vectorized_chunk_collection)
154
- await vectorized_chunk_repository.create_indexes()
155
  return vectorized_chunk_repository
 
146
  result = await self.collection.delete_many({"parent_id": parent_id})
147
  return result.deleted_count
148
 
149
+ async def ensure_indices_exist(self: Self) -> None:
150
+ await super().ensure_indices_exist()
151
+ index_name = "parent_chunk_unique"
152
+ existing_indices = await collection.index_information()
153
+ if index_name not in existing_indices:
154
+ await self.collection.create_index([("parent_id", ASCENDING), ("chunk_id", ASCENDING)], unique=True, name=index_name)
155
 
156
  class MongoVectorizedChunkRepositoryResource(AsyncResource):
157
  async def init(self: Self, settings: Settings, mongo_db: MongoDB) -> MongoVectorizedChunkRepository:
158
  vectorized_chunk_collection = await mongo_db.get_collection("vectorized_chunks")
159
  vectorized_chunk_repository = MongoVectorizedChunkRepository(settings=settings, collection=vectorized_chunk_collection)
160
+ await vectorized_chunk_repository.ensure_indices_exists()
161
  return vectorized_chunk_repository
src/ctp_slack_bot/db/repositories/vector_repository_base.py CHANGED
@@ -17,9 +17,13 @@ class VectorRepositoryBase(ABC, BaseModel):
17
  frozen=True
18
  arbitrary_types_allowed = True
19
 
20
- async def create_indexes(self: Self) -> None:
 
 
 
 
21
  """
22
- Create a vector search index.
23
  """
24
  index_name = f"{self.collection.name}_vector_index"
25
  try:
 
17
  frozen=True
18
  arbitrary_types_allowed = True
19
 
20
+ async def ensure_indices_exist(self: Self) -> None:
21
+ """Ensure that indices exist."""
22
+ self.ensure_search_index_exists()
23
+
24
+ async def ensure_search_index_exists(self: Self) -> None:
25
  """
26
+ Ensure that a vector search index exists.
27
  """
28
  index_name = f"{self.collection.name}_vector_index"
29
  try: