Spaces:

gabykim
/

KnowLang_Transformers_Demo

Sleeping

App Files Files Community

gabykim commited on Feb 1

Commit

3536fc0

1 Parent(s): d88e3a5

multiple embedding provider implemented

Browse files

Files changed (6) hide show

src/know_lang_bot/chat_bot/chat_graph.py +7 -7
src/know_lang_bot/core/types.py +2 -1
src/know_lang_bot/models/embeddings.py +71 -0
src/know_lang_bot/summarizer/summarizer.py +2 -13
src/know_lang_bot/utils/model_provider.py +3 -0
tests/test_summarizer.py +9 -9

src/know_lang_bot/chat_bot/chat_graph.py CHANGED Viewed

@@ -5,15 +5,15 @@ from typing import AsyncGenerator, List, Dict, Any, Optional
 import chromadb
 from pydantic import BaseModel
 from pydantic_graph import BaseNode, EndStep, Graph, GraphRunContext, End, HistoryStep
-import ollama
 from know_lang_bot.config import AppConfig
 from know_lang_bot.utils.fancy_log import FancyLogger
 from pydantic_ai import Agent
 import logfire
-from pprint import pformat
 from enum import Enum
 from rich.console import Console
 from know_lang_bot.utils.model_provider import create_pydantic_model
 LOG = FancyLogger(__name__)
 console = Console()
@@ -152,17 +152,17 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
     async def run(self, ctx: GraphRunContext[ChatGraphState, ChatGraphDeps]) -> AnswerQuestionNode:
         try:
-            embedded_question = ollama.embed(
-                model=ctx.deps.config.embedding.model_name,
-                input=ctx.state.polished_question or ctx.state.original_question
             )
             results = ctx.deps.collection.query(
-                query_embeddings=embedded_question['embeddings'],
                 n_results=ctx.deps.config.chat.max_context_chunks,
                 include=['metadatas', 'documents', 'distances']
             )
-            logfire.debug('query result: {result}', result=pformat(results))
             relevant_chunks = []
             relevant_metadatas = []

 import chromadb
 from pydantic import BaseModel
 from pydantic_graph import BaseNode, EndStep, Graph, GraphRunContext, End, HistoryStep
 from know_lang_bot.config import AppConfig
 from know_lang_bot.utils.fancy_log import FancyLogger
 from pydantic_ai import Agent
 import logfire
+from rich.pretty import Pretty
 from enum import Enum
 from rich.console import Console
 from know_lang_bot.utils.model_provider import create_pydantic_model
+from know_lang_bot.models.embeddings import generate_embedding
 LOG = FancyLogger(__name__)
 console = Console()
     async def run(self, ctx: GraphRunContext[ChatGraphState, ChatGraphDeps]) -> AnswerQuestionNode:
         try:
+            question_embedding = generate_embedding(
+                input=ctx.state.polished_question or ctx.state.original_question,
+                model=ctx.deps.config.embedding
             )
             results = ctx.deps.collection.query(
+                query_embeddings=question_embedding,
                 n_results=ctx.deps.config.chat.max_context_chunks,
                 include=['metadatas', 'documents', 'distances']
             )
+            logfire.debug('query result: {result}', result=Pretty(results))
             relevant_chunks = []
             relevant_metadatas = []

src/know_lang_bot/core/types.py CHANGED Viewed

@@ -23,4 +23,5 @@ class ModelProvider(str, Enum):
     OPENAI = "openai"
     ANTHROPIC = "anthropic"
     OLLAMA = "ollama"
-    HUGGINGFACE = "huggingface"

     OPENAI = "openai"
     ANTHROPIC = "anthropic"
     OLLAMA = "ollama"
+    HUGGINGFACE = "huggingface"
+    TESTING = "testing"

src/know_lang_bot/models/embeddings.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import ollama
+import openai
+from know_lang_bot.config import EmbeddingConfig, ModelProvider
+from typing import Union, List, overload
+# Type definitions
+EmbeddingVector = List[float]
+class EmbeddingConfig:
+    def __init__(self, provider: ModelProvider, model_name: str):
+        self.provider = provider
+        self.model_name = model_name
+def _process_ollama_batch(inputs: List[str], model_name: str) -> List[EmbeddingVector]:
+    """Helper function to process Ollama embeddings in batch."""
+    return [
+        ollama.embed(model=model_name, input=inputs)['embeddings']
+    ]
+def _process_openai_batch(inputs: List[str], model_name: str) -> List[EmbeddingVector]:
+    """Helper function to process OpenAI embeddings in batch."""
+    response = openai.embeddings.create(
+        input=inputs,
+        model=model_name
+    )
+    return [item.embedding for item in response.data]
+@overload
+def generate_embedding(input: str, config: EmbeddingConfig) -> EmbeddingVector: ...
+@overload
+def generate_embedding(input: List[str], config: EmbeddingConfig) -> List[EmbeddingVector]: ...
+def generate_embedding(
+    input: Union[str, List[str]],
+    config: EmbeddingConfig
+) -> Union[EmbeddingVector, List[EmbeddingVector]]:
+    """
+    Generate embeddings for single text input or batch of texts.
+    Args:
+        input: Single string or list of strings to embed
+        config: Configuration object containing provider and model information
+    Returns:
+        Single embedding vector for single input, or list of embedding vectors for batch input
+    Raises:
+        ValueError: If input type is invalid or provider is not supported
+        RuntimeError: If embedding generation fails
+    """
+    if not input:
+        raise ValueError("Input cannot be empty")
+    # Convert single string to list for batch processing
+    is_single_input = isinstance(input, str)
+    inputs = [input] if is_single_input else input
+    try:
+        if config.provider == ModelProvider.OLLAMA:
+            embeddings = _process_ollama_batch(inputs, config.model_name)
+        elif config.provider == ModelProvider.OPENAI:
+            embeddings = _process_openai_batch(inputs, config.model_name)
+        else:
+            raise ValueError(f"Unsupported provider: {config.provider}")
+        # Return single embedding for single input
+        return embeddings[0] if is_single_input else embeddings
+    except Exception as e:
+        raise RuntimeError(f"Failed to generate embeddings: {str(e)}") from e

src/know_lang_bot/summarizer/summarizer.py CHANGED Viewed

@@ -3,7 +3,6 @@ import chromadb
 from chromadb.errors import InvalidCollectionException
 from pydantic_ai import Agent
 from pydantic import BaseModel, Field
-import ollama
 from pprint import pformat
 from rich.progress import Progress
@@ -11,6 +10,7 @@ from know_lang_bot.config import AppConfig
 from know_lang_bot.core.types import CodeChunk, ModelProvider
 from know_lang_bot.utils.fancy_log import FancyLogger
 from know_lang_bot.utils.model_provider import create_pydantic_model
 LOG = FancyLogger(__name__)
@@ -76,17 +76,6 @@ Provide a clean, concise and focused summary. Don't include unnecessary nor gene
                 metadata={"hnsw:space": "cosine"}
             )
-    def _get_embedding(self, text: str) -> List[float]:
-        """Get embedding for text using configured provider"""
-        if self.config.embedding.provider == ModelProvider.OLLAMA:
-            response = ollama.embed(
-                model=self.config.embedding.model_name,
-                input=text
-            )
-            return response['embeddings']
-        else:
-            raise ValueError(f"Unsupported embedding provider: {self.config.embedding.provider}")
     async def summarize_chunk(self, chunk: CodeChunk) -> str:
         """Summarize a single code chunk using the LLM"""
         prompt = f"""
@@ -122,7 +111,7 @@ Provide a clean, concise and focused summary. Don't include unnecessary nor gene
         )
         # Get embedding for the summary
-        embedding = self._get_embedding(summary)
         # Store in ChromaDB
         self.collection.add(

 from chromadb.errors import InvalidCollectionException
 from pydantic_ai import Agent
 from pydantic import BaseModel, Field
 from pprint import pformat
 from rich.progress import Progress
 from know_lang_bot.core.types import CodeChunk, ModelProvider
 from know_lang_bot.utils.fancy_log import FancyLogger
 from know_lang_bot.utils.model_provider import create_pydantic_model
+from know_lang_bot.models.embeddings import generate_embedding
 LOG = FancyLogger(__name__)
                 metadata={"hnsw:space": "cosine"}
             )
     async def summarize_chunk(self, chunk: CodeChunk) -> str:
         """Summarize a single code chunk using the LLM"""
         prompt = f"""
         )
         # Get embedding for the summary
+        embedding = generate_embedding(summary, self.config.embedding)
         # Store in ChromaDB
         self.collection.add(

src/know_lang_bot/utils/model_provider.py CHANGED Viewed

@@ -13,5 +13,8 @@ def create_pydantic_model(
         return model_str
     elif model_provider == ModelProvider.HUGGINGFACE:
         return HuggingFaceModel(model_name=model_name)
     else:
         raise NotImplementedError(f"Model {model_provider}:{model_name} is not supported")

         return model_str
     elif model_provider == ModelProvider.HUGGINGFACE:
         return HuggingFaceModel(model_name=model_name)
+    elif model_provider == ModelProvider.TESTING:
+        # should be used for testing purposes only
+        pass
     else:
         raise NotImplementedError(f"Model {model_provider}:{model_name} is not supported")

tests/test_summarizer.py CHANGED Viewed

@@ -11,7 +11,7 @@ def config():
     """Create a test configuration"""
     with tempfile.TemporaryDirectory() as temp_dir:
         yield AppConfig(
-            llm={"model_name": "test-model", "model_provider": "test"},
             db={"persist_directory": Path(temp_dir), "collection_name": "test_collection"}
         )
@@ -85,11 +85,11 @@ def test_chromadb_initialization(mock_agent_class, config: AppConfig):
     assert new_summarizer.collection is not None
 @pytest.mark.asyncio
-@patch('know_lang_bot.summarizer.summarizer.ollama')
 @patch('know_lang_bot.summarizer.summarizer.Agent')
 async def test_process_and_store_chunk_with_embedding(
     mock_agent_class,
-    mock_ollama,
     config: AppConfig,
     sample_chunks: list[CodeChunk],
     mock_run_result: Mock
@@ -100,8 +100,8 @@ async def test_process_and_store_chunk_with_embedding(
     mock_agent.run = AsyncMock(return_value=mock_run_result)
     # Setup mock embedding response
-    mock_embedding = {'embeddings': [0.1, 0.2, 0.3]}  # Sample embedding vector
-    mock_ollama.embed = Mock(return_value=mock_embedding)
     summarizer = CodeSummarizer(config)
@@ -112,9 +112,9 @@ async def test_process_and_store_chunk_with_embedding(
     await summarizer.process_and_store_chunk(sample_chunks[0])
     # Verify ollama.embed was called with correct parameters
-    mock_ollama.embed.assert_called_once_with(
-        model=config.embedding.model_name,
-        input=mock_run_result.data
     )
     # Verify collection.add was called with correct parameters
@@ -123,7 +123,7 @@ async def test_process_and_store_chunk_with_embedding(
     kwargs = add_call[1]
     assert len(kwargs['embeddings']) == 3
-    assert kwargs['embeddings'] == mock_embedding['embeddings']
     assert kwargs['documents'][0] == mock_run_result.data
     assert kwargs['ids'][0] == f"{sample_chunks[0].file_path}:{sample_chunks[0].start_line}-{sample_chunks[0].end_line}"

     """Create a test configuration"""
     with tempfile.TemporaryDirectory() as temp_dir:
         yield AppConfig(
+            llm={"model_name": "testing", "model_provider": "testing"},
             db={"persist_directory": Path(temp_dir), "collection_name": "test_collection"}
         )
     assert new_summarizer.collection is not None
 @pytest.mark.asyncio
+@patch('know_lang_bot.summarizer.summarizer.generate_embedding')
 @patch('know_lang_bot.summarizer.summarizer.Agent')
 async def test_process_and_store_chunk_with_embedding(
     mock_agent_class,
+    mock_embedding_generator,
     config: AppConfig,
     sample_chunks: list[CodeChunk],
     mock_run_result: Mock
     mock_agent.run = AsyncMock(return_value=mock_run_result)
     # Setup mock embedding response
+    mock_embedding = [0.1, 0.2, 0.3]  # Sample embedding vector
+    mock_embedding_generator.return_value = mock_embedding
     summarizer = CodeSummarizer(config)
     await summarizer.process_and_store_chunk(sample_chunks[0])
     # Verify ollama.embed was called with correct parameters
+    mock_embedding_generator.assert_called_once_with(
+        mock_run_result.data,
+        config.embedding,
     )
     # Verify collection.add was called with correct parameters
     kwargs = add_call[1]
     assert len(kwargs['embeddings']) == 3
+    assert kwargs['embeddings'] == mock_embedding
     assert kwargs['documents'][0] == mock_run_result.data
     assert kwargs['ids'][0] == f"{sample_chunks[0].file_path}:{sample_chunks[0].start_line}-{sample_chunks[0].end_line}"