Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

Kevin Li commited on Apr 5

Commit

551ef8f

unverified ·

2 Parent(s): 306043c 64566ca

Merge pull request #1 from CUNYTechPrep/origin/alt/LanguageModelServices

Browse files

Files changed (14) hide show

.dockerignore +3 -3
.env.template +27 -11
.gitignore +3 -3
README.MD +7 -0
pyproject.toml +1 -0
src/ctp_slack_bot/api/main.py +21 -13
src/ctp_slack_bot/core/config.py +32 -35
src/ctp_slack_bot/core/response_rendering.py +13 -0
src/ctp_slack_bot/db/MongoDB.py +122 -0
src/ctp_slack_bot/models/VectorQuery.py +17 -0
src/ctp_slack_bot/models/content.py +19 -0
src/ctp_slack_bot/services/AnswerQuestionService.py +60 -0
src/ctp_slack_bot/services/ContextRetrievalService.py +76 -0
src/ctp_slack_bot/services/VectorDatabaseService.py +124 -0

.dockerignore CHANGED Viewed

@@ -59,11 +59,11 @@ venv.bak/
 # PyCharm
 .idea/
-# Jupyter notebooks
-notebooks/
 # Documentation
 docs/
 # MacOS
 .DS_Store

 # PyCharm
 .idea/
 # Documentation
 docs/
 # MacOS
 .DS_Store
+# Application logs
+/logs

.env.template CHANGED Viewed

@@ -1,25 +1,41 @@
 # Copy this file and modify. Do not save or commit the secrets!
 # API Configuration
 API_HOST=0.0.0.0
 API_PORT=8000
-DEBUG=false
-# MongoDB Configuration
-MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
-MONGODB_DB_NAME=ctp_slack_bot
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
 SLACK_SIGNING_SECRET=🔏
 SLACK_APP_TOKEN=🦥
 # Hugging Face Configuration
 HF_API_TOKEN=🤗
-# Logging Configuration
-LOG_LEVEL=INFO
-LOG_FORMAT=json
-# APScheduler Configuration
-SCHEDULER_TIMEZONE=UTC

 # Copy this file and modify. Do not save or commit the secrets!
+# Application Configuration
+DEBUG=false
+# Logging Configuration
+LOG_LEVEL=INFO
+LOG_FORMAT=text
+# APScheduler Configuration
+SCHEDULER_TIMEZONE=UTC
 # API Configuration
 API_HOST=0.0.0.0
 API_PORT=8000
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
 SLACK_SIGNING_SECRET=🔏
 SLACK_APP_TOKEN=🦥
+# Vectorization Configuration
+EMBEDDING_MODEL=🌮
+VECTOR_DIMENSION=9001
+CHUNK_SIZE=42
+CHUNK_OVERLAP=37
+TOP_K_MATCHES=1
+# MongoDB Configuration
+MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
+MONGODB_NAME=ctp_slack_bot
 # Hugging Face Configuration
 HF_API_TOKEN=🤗
+# OpenAI Configuration
+OPENAI_API_KEY=😐
+CHAT_MODEL=🙊
+MAX_TOKENS=42
+TEMPERATURE=0.5
+SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."

.gitignore CHANGED Viewed

@@ -91,8 +91,8 @@ dmypy.json
 # PyCharm
 .idea/
-# Jupyter notebooks
-notebooks/
 # MacOS
 .DS_Store

 # PyCharm
 .idea/
 # MacOS
 .DS_Store
+# Application logs
+/logs

README.MD CHANGED Viewed

@@ -14,6 +14,7 @@
 * `src/`
     * `ctp_slack_bot/`
         * `api/`: FastAPI application structure
         * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
         * `db/`: database connection
             * `repositories/`: repository pattern implementation
@@ -23,7 +24,9 @@
         * `utils/`: reusable utilities
 * `tests/`: unit tests
 * `scripts/`: utility scripts for development, deployment, etc.
 * `notebooks/`: Jupyter notebooks for exploration and model development
 ## How to Run the Application
@@ -41,6 +44,8 @@ First, make sure you are set up with a Python virtual environment created by the
 pip3 install -e .
 ```
 If `localhost` port `8000` is free, running the following will make the application available on that port:
 ```sh
@@ -54,4 +59,6 @@ $ curl http://localhost:8000/health
 {"status":"healthy"}
 ```
 Uvicorn will restart the application automatically when any source files are changed.

 * `src/`
     * `ctp_slack_bot/`
         * `api/`: FastAPI application structure
+            * `routes.py`: API endpoint definitions
         * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
         * `db/`: database connection
             * `repositories/`: repository pattern implementation
         * `utils/`: reusable utilities
 * `tests/`: unit tests
 * `scripts/`: utility scripts for development, deployment, etc.
+    * `run-dev.sh`: script to run the application locally
 * `notebooks/`: Jupyter notebooks for exploration and model development
+* `.env`: local environment variables for development purposes
 ## How to Run the Application
 pip3 install -e .
 ```
+Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
 If `localhost` port `8000` is free, running the following will make the application available on that port:
 ```sh
 {"status":"healthy"}
 ```
+In debug mode (`DEBUG=true`), [http://localhost:8000/env](http://localhost:8000/env) will pretty-print the non-sensitive environment variables as JSON.
 Uvicorn will restart the application automatically when any source files are changed.

pyproject.toml CHANGED Viewed

@@ -43,6 +43,7 @@ dev = [
     "pytest>=7.3.1",
     "pytest-cov>=4.1.0",
     "mypy>=1.3.0",
     "black>=23.3.0",
     "isort>=5.12.0",
     "ruff>=0.0.270",

     "pytest>=7.3.1",
     "pytest-cov>=4.1.0",
     "mypy>=1.3.0",
+    "types-pytz>=2025.2",
     "black>=23.3.0",
     "isort>=5.12.0",
     "ruff>=0.0.270",

src/ctp_slack_bot/api/main.py CHANGED Viewed

@@ -1,23 +1,23 @@
-import logging
 from contextlib import asynccontextmanager
-from fastapi import FastAPI
 from loguru import logger
 from ctp_slack_bot.api.routes import router
-from ctp_slack_bot.core.config import settings
 from ctp_slack_bot.core.logging import setup_logging
 from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
 @asynccontextmanager
-async def lifespan(app: FastAPI):
     """
     Lifespan context manager for FastAPI application.
     Handles startup and shutdown events.
     """
     # Setup logging
-    #setup_logging()
     logger.info("Starting application")
     # Start scheduler
@@ -42,11 +42,19 @@ app = FastAPI(
 # Include routers
 app.include_router(router)
 @app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy"}
 if __name__ == "__main__":
@@ -54,7 +62,7 @@ if __name__ == "__main__":
     uvicorn.run(
         "main:app",
-        host="localhost", #settings.API_HOST,
-        port=8000, #settings.API_PORT,
-        reload=True #settings.DEBUG,
     )

 from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
 from loguru import logger
+from typing import AsyncGenerator, Never
 from ctp_slack_bot.api.routes import router
+from ctp_slack_bot.core.config import Settings, settings
 from ctp_slack_bot.core.logging import setup_logging
+from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
 from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
 @asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncGenerator:
     """
     Lifespan context manager for FastAPI application.
     Handles startup and shutdown events.
     """
     # Setup logging
+    setup_logging()
     logger.info("Starting application")
     # Start scheduler
 # Include routers
 app.include_router(router)
 @app.get("/health")
+async def health() -> dict[str, str]:
+    """Health check"""
+    return {
+        "status": "healthy"
+    }
+@app.get("/env", response_class=PrettyJSONResponse)
+async def env() -> Settings:
+    """Server-internal environment variables"""
+    if not settings.DEBUG:
+        raise HTTPException(status_code=404)
+    return settings
 if __name__ == "__main__":
     uvicorn.run(
         "main:app",
+        host=settings.API_HOST,
+        port=settings.API_PORT,
+        reload=settings.DEBUG
     )

src/ctp_slack_bot/core/config.py CHANGED Viewed

@@ -1,57 +1,54 @@
 from functools import lru_cache
 from typing import Literal, Optional
-from pydantic import Field, SecretStr, validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
-class Settings(BaseSettings):
     """
     Application settings loaded from environment variables.
     """
-    # API Configuration
-    API_HOST: str = "0.0.0.0"
-    API_PORT: int = 8000
     DEBUG: bool = False
     # Vectorization Configuration
-    EMBEDDING_MODEL: str = "text-embedding-3-small"
-    VECTOR_DIMENSION: int = 1536
-    CHUNK_SIZE: int = 1000
-    CHUNK_OVERLAP: int = 200
-    TOP_K_MATCHES: int = 5
     # MongoDB Configuration
-    MONGODB_URI: Optional[SecretStr] = None # TODO: Remove optionality
-    MONGODB_DB_NAME: str = "ctp_slack_bot"
-    # Slack Configuration
-    SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
-    SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
-    SLACK_APP_TOKEN: Optional[SecretStr] = None
     # Hugging Face Configuration
     HF_API_TOKEN: Optional[SecretStr] = None
     # OpenAI Configuration
     OPENAI_API_KEY: Optional[SecretStr] = None
-    # Logging Configuration
-    LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
-    LOG_FORMAT: Literal["text", "json"] = "json"
-    # APScheduler Configuration
-    SCHEDULER_TIMEZONE: str = "UTC"
-    @validator("MONGODB_URI")
-    def validate_mongodb_uri(cls, v):
-        """Validate MongoDB URI format"""
-        #if not v.get_secret_value().startswith("mongodb"):
-        #    raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
-        return v
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",
@@ -64,7 +61,7 @@ def get_settings() -> Settings:
     """
     Get cached settings instance.
     """
-    return Settings()
 settings = get_settings()

 from functools import lru_cache
 from typing import Literal, Optional
+from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
 from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
     """
     Application settings loaded from environment variables.
     """
+    # Application Configuration
     DEBUG: bool = False
+    # Logging Configuration
+    LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
+    LOG_FORMAT: Literal["text", "json"] = "json"
+    # APScheduler Configuration
+    SCHEDULER_TIMEZONE: str = "UTC"
+    # API Configuration
+    API_HOST: str
+    API_PORT: PositiveInt
+    # Slack Configuration
+    SLACK_BOT_TOKEN: SecretStr
+    SLACK_SIGNING_SECRET: SecretStr
+    SLACK_APP_TOKEN: SecretStr
     # Vectorization Configuration
+    EMBEDDING_MODEL: str
+    VECTOR_DIMENSION: PositiveInt
+    CHUNK_SIZE: PositiveInt
+    CHUNK_OVERLAP: NonNegativeInt
+    TOP_K_MATCHES: PositiveInt
     # MongoDB Configuration
+    MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
+    MONGODB_NAME: str
     # Hugging Face Configuration
     HF_API_TOKEN: Optional[SecretStr] = None
     # OpenAI Configuration
     OPENAI_API_KEY: Optional[SecretStr] = None
+    CHAT_MODEL: str
+    MAX_TOKENS: PositiveInt
+    TEMPERATURE: NonNegativeFloat
+    SYSTEM_PROMPT: str
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",
     """
     Get cached settings instance.
     """
+    return Settings() # type: ignore
 settings = get_settings()

src/ctp_slack_bot/core/response_rendering.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from json import dumps
+from starlette.responses import JSONResponse
+from typing import Any, Self
+class PrettyJSONResponse(JSONResponse):
+    def render(self: Self, content: Any) -> bytes:
+        return dumps(
+            content,
+            ensure_ascii=False,
+            allow_nan=False,
+            indent=4,
+            separators=(", ", ": "),
+        ).encode("utf-8")

src/ctp_slack_bot/db/MongoDB.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from motor.motor_asyncio import AsyncIOMotorClient
+from pymongo import IndexModel, ASCENDING
+import logging
+from typing import Optional
+from ctp_slack_bot.core.config import settings
+logger = logging.getLogger(__name__)
+class MongoDB:
+    """
+    MongoDB connection and initialization class.
+    Handles connection to MongoDB, database selection, and index creation.
+    """
+    def __init__(self):
+        self.client: Optional[AsyncIOMotorClient] = None
+        self.db = None
+        self.vector_collection = None
+        self.initialized = False
+    async def connect(self):
+        """
+        Connect to MongoDB using connection string from settings.
+        """
+        if self.client is not None:
+            return
+        if not settings.MONGODB_URI:
+            raise ValueError("MONGODB_URI is not set in environment variables")
+        try:
+            # Create MongoDB connection
+            self.client = AsyncIOMotorClient(settings.MONGODB_URI.get_secret_value())
+            self.db = self.client[settings.MONGODB_DB_NAME]
+            self.vector_collection = self.db["vector_store"]
+            logger.info(f"Connected to MongoDB: {settings.MONGODB_DB_NAME}")
+        except Exception as e:
+            logger.error(f"Error connecting to MongoDB: {str(e)}")
+            raise
+    async def initialize(self):
+        """
+        Initialize MongoDB with required collections and indexes.
+        """
+        if self.initialized:
+            return
+        if not self.client:
+            await self.connect()
+        try:
+            # Create vector index for similarity search
+            await self.create_vector_index()
+            self.initialized = True
+            logger.info("MongoDB initialized successfully")
+        except Exception as e:
+            logger.error(f"Error initializing MongoDB: {str(e)}")
+            raise
+    async def create_vector_index(self):
+        """
+        Create vector index for similarity search using MongoDB Atlas Vector Search.
+        """
+        try:
+            # Check if index already exists
+            existing_indexes = await self.vector_collection.list_indexes().to_list(length=None)
+            index_names = [index.get('name') for index in existing_indexes]
+            if "vector_index" not in index_names:
+                # Create vector search index
+                index_definition = {
+                    "mappings": {
+                        "dynamic": True,
+                        "fields": {
+                            "embedding": {
+                                "dimensions": settings.VECTOR_DIMENSION,
+                                "similarity": "cosine",
+                                "type": "knnVector"
+                            }
+                        }
+                    }
+                }
+                # Create the index
+                await self.db.command({
+                    "createIndexes": self.vector_collection.name,
+                    "indexes": [
+                        {
+                            "name": "vector_index",
+                            "key": {"embedding": "vector"},
+                            "weights": {"embedding": 1},
+                            "vectorSearchOptions": index_definition
+                        }
+                    ]
+                })
+                # Create additional metadata indexes for filtering
+                await self.vector_collection.create_index([("metadata.source", ASCENDING)])
+                await self.vector_collection.create_index([("metadata.timestamp", ASCENDING)])
+                logger.info("Vector search index created")
+            else:
+                logger.info("Vector search index already exists")
+        except Exception as e:
+            logger.error(f"Error creating vector index: {str(e)}")
+            raise
+    async def close(self):
+        """
+        Close MongoDB connection.
+        """
+        if self.client:
+            self.client.close()
+            self.client = None
+            self.db = None
+            self.vector_collection = None
+            self.initialized = False
+            logger.info("MongoDB connection closed")
+# Create a singleton instance
+mongodb = MongoDB()

src/ctp_slack_bot/models/VectorQuery.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic import BaseModel, Field, validator
+from typing import Optional, List, Dict, Any
+from ctp_slack_bot.core.config import settings
+class VectorQuery(BaseModel):
+    """Model for vector database similarity search queries.
+    Attributes:
+        query_text: The text to be vectorized and used for similarity search
+        k: Number of similar documents to retrieve
+        score_threshold: Minimum similarity score threshold for inclusion in results
+        filter_metadata: Optional filters for metadata fields
+    """
+    query_text: str
+    k: int = Field(default=settings.TOP_K_MATCHES)
+    score_threshold: float = Field(default=0.7)
+    filter_metadata: Optional[Dict[str, Any]] = None

src/ctp_slack_bot/models/content.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pydantic import BaseModel, Field
+from typing import Optional, List, Dict, Any
+from ctp_slack_bot.models.slack import SlackMessage
+class RetreivedContext(BaseModel):
+    """Represents a the context of a question from Slack returned from the Vector Store Database.
+    contextual_text: The text that is relevant to the question.
+    metadata_source: The source of the contextual text.
+    similarity_score: The similarity score of the contextual text to the question.
+    in_reation_to_question: OPTINAL:  The question that the contextual text is related to.
+    """
+    contextual_text: str
+    metadata_source: str
+    similarity_score: float
+    said_by: str = Optional[None]
+    in_reation_to_question: str = Optional[None]

src/ctp_slack_bot/services/AnswerQuestionService.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from pydantic import BaseModel, validator
+from typing import List, Optional, Tuple
+from ctp_slack_bot.core.config import settings
+import numpy as np
+from openai import OpenAI
+from ctp_slack_bot.models.slack import SlackMessage
+from ctp_slack_bot.models.content import RetreivedContext
+class GenerateAnswer():
+    """
+    Service for language model operations.
+    """
+    def __init__(self):
+        self.client = OpenAI(api_key=settings.OPENAI_API_KEY)
+    def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str:
+            """Generate a response using OpenAI's API with retrieved context.
+            Args:
+                question (str): The user's question
+                context (List[RetreivedContext]): List of RetreivedContext
+            Returns:
+                str: Generated answer
+            """
+            # Prepare context string from retrieved chunks
+            context_str = ""
+            for c in context:
+                context_str += f"{c.contextual_text}\n"
+            # logger.info(f"Generating response for question: {question}")
+            # logger.info(f"Using {len(context)} context chunks")
+            # Create messages for the chat completion
+            messages = [
+                {"role": "system", "content": settings.SYSTEM_PROMPT},
+                {"role": "user", "content":
+                    f"""Student Auestion: {question.text}
+                    Context from class materials and transcripts: {context_str}
+                    Please answer the Student Auestion based on the Context from class materials and transcripts. If the context doesn't contain relevant information, acknowledge that and suggest asking the professor."""}
+            ]
+            # Generate response
+            response = self.client.chat.completions.create(
+                model=settings.CHAT_MODEL,
+                messages=messages,
+                max_tokens=settings.MAX_TOKENS,
+                temperature=settings.TEMPERATURE
+            )
+            return response.choices[0].message.content
+### REMOVE BELOW, PUT SOMEWHERE IN TESTS BUT IDK WHERE YET
+# sm = SlackMessage(text="What is the capital of France?", channel_id="123", user_id="456", timestamp="789")
+# context = [RetreivedContext(contextual_text="The capital of France is Paris", metadata_source="class materials", similarity_score=0.95)]
+# a = GenerateAnswer()
+# a.generate_answer(sm, context)

src/ctp_slack_bot/services/ContextRetrievalService.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import logging
+from typing import List, Dict, Any, Optional
+from ctp_slack_bot.models.slack import SlackMessage
+from ctp_slack_bot.models.content import RetreivedContext
+from ctp_slack_bot.models.VectorQuery import VectorQuery
+from ctp_slack_bot.services.VectorizationService import VectorizationService
+from ctp_slack_bot.services.VectorDatabaseService import VectorDatabaseService
+from ctp_slack_bot.core.config import settings
+logger = logging.getLogger(__name__)
+class ContextRetrievalService:
+    """
+    Service for retrieving relevant context from the vector database based on user questions.
+    """
+    def __init__(self):
+        self.vectorization_service = VectorizationService()
+        self.vector_db_service = VectorDatabaseService()
+    async def initialize(self):
+        """
+        Initialize the required services.
+        """
+        await self.vector_db_service.initialize()
+    async def get_context(self, message: SlackMessage) -> List[RetreivedContext]:
+        """
+        Retrieve relevant context for a given Slack message.
+        This function:
+        1. Extracts the question text from the message
+        2. Vectorizes the question using VectorizationService
+        3. Queries VectorDatabaseService for similar context
+        4. Returns the relevant context as a list of RetreivedContext objects
+        Args:
+            message: The SlackMessage containing the user's question
+        Returns:
+            List[RetreivedContext]: List of retrieved context items with similarity scores
+        """
+        if not message.is_question:
+            logger.debug(f"Message {message.key} is not a question, skipping context retrieval")
+            return []
+        try:
+            # Vectorize the message text
+            embeddings = self.vectorization_service.get_embeddings([message.text])
+            if embeddings is None or len(embeddings) == 0:
+                logger.error(f"Failed to generate embedding for message: {message.key}")
+                return []
+            query_embedding = embeddings[0].tolist()
+            # Create vector query
+            vector_query = VectorQuery(
+                query_text=message.text,
+                k=settings.TOP_K_MATCHES,
+                score_threshold=0.7  # Minimum similarity threshold
+            )
+            # Search for similar content in vector database
+            context_results = await self.vector_db_service.search_by_similarity(
+                query=vector_query,
+                query_embedding=query_embedding
+            )
+            logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}")
+            return context_results
+        except Exception as e:
+            logger.error(f"Error retrieving context for message {message.key}: {str(e)}")
+            return []

src/ctp_slack_bot/services/VectorDatabaseService.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import logging
+from typing import List, Dict, Any, Optional
+# import numpy as np
+from ctp_slack_bot.db.MongoDB import mongodb
+from ctp_slack_bot.models.VectorQuery import VectorQuery
+from ctp_slack_bot.models.content import RetreivedContext
+logger = logging.getLogger(__name__)
+class VectorDatabaseService:
+    """
+    Service for storing and retrieving vector embeddings from MongoDB.
+    """
+    async def initialize(self):
+        """
+        Initialize the database connection.
+        """
+        await mongodb.initialize()
+    async def store(self, text: str, embedding: List[float], metadata: Dict[str, Any]) -> str:
+        """
+        Store text and its embedding vector in the database.
+        Args:
+            text: The text content to store
+            embedding: The vector embedding of the text
+            metadata: Additional metadata about the text (source, timestamp, etc.)
+        Returns:
+            str: The ID of the stored document
+        """
+        if not mongodb.initialized:
+            await mongodb.initialize()
+        try:
+            # Create document to store
+            document = {
+                "text": text,
+                "embedding": embedding,
+                "metadata": metadata
+            }
+            # Insert into collection
+            result = await mongodb.vector_collection.insert_one(document)
+            logger.debug(f"Stored document with ID: {result.inserted_id}")
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"Error storing embedding: {str(e)}")
+            raise
+    async def search_by_similarity(self, query: VectorQuery, query_embedding: List[float]) -> List[RetreivedContext]:
+        """
+        Query the vector database for similar documents.
+        Args:
+            query: VectorQuery object with search parameters
+            query_embedding: The vector embedding of the query text
+        Returns:
+            List[RetreivedContext]: List of similar documents with similarity scores
+        """
+        if not mongodb.initialized:
+            await mongodb.initialize()
+        try:
+            # Build aggregation pipeline for vector search
+            pipeline = [
+                {
+                    "$search": {
+                        "index": "vector_index",
+                        "knnBeta": {
+                            "vector": query_embedding,
+                            "path": "embedding",
+                            "k": query.k
+                        }
+                    }
+                },
+                {
+                    "$project": {
+                        "_id": 0,
+                        "text": 1,
+                        "metadata": 1,
+                        "score": {"$meta": "searchScore"}
+                    }
+                }
+            ]
+            # Add metadata filters if provided
+            if query.filter_metadata:
+                metadata_filter = {f"metadata.{k}": v for k, v in query.filter_metadata.items()}
+                pipeline.insert(1, {"$match": metadata_filter})
+            # Execute the pipeline
+            results = await mongodb.vector_collection.aggregate(pipeline).to_list(length=query.k)
+            # Convert to RetreivedContext objects directly
+            context_results = []
+            for result in results:
+                # Normalize score to [0,1] range
+                normalized_score = result.get("score", 0)
+                # Skip if below threshold
+                if normalized_score < query.score_threshold:
+                    continue
+                context_results.append(
+                    RetreivedContext(
+                        contextual_text=result["text"],
+                        metadata_source=result["metadata"].get("source", "unknown"),
+                        similarity_score=normalized_score,
+                        said_by=result["metadata"].get("speaker", None),
+                        in_reation_to_question=result["metadata"].get("related_question", None)
+                    )
+                )
+            logger.debug(f"Found {len(context_results)} similar documents")
+            return context_results
+        except Exception as e:
+            logger.error(f"Error in similarity search: {str(e)}")
+            raise