Spaces:

gabykim
/

KnowLang_Transformers_Demo

Sleeping

App Files Files Community

gabykim commited on Jan 28

Commit

279fcbd

1 Parent(s): 410a99b

gradio chat interface draft

Browse files

Files changed (7) hide show

poetry.lock +0 -0
pyproject.toml +3 -2
src/know_lang_bot/__init__.py +3 -0
src/know_lang_bot/chat_bot/chat_agent.py +100 -0
src/know_lang_bot/chat_bot/chat_config.py +27 -0
src/know_lang_bot/chat_bot/chat_interface.py +93 -0
src/know_lang_bot/chat_bot/run.py +4 -0

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -12,13 +12,14 @@ authors = [
 readme = "README.md"
 requires-python = ">=3.10, <4.0"
 dependencies = [
-    "pydantic-ai (>=0.0.20,<0.0.21)",
     "gitpython (>=3.1.44,<4.0.0)",
     "tree-sitter (>=0.24.0,<0.25.0)",
     "tree-sitter-python (>=0.23.6,<0.24.0)",
     "pydantic-settings (>=2.7.1,<3.0.0)",
     "chromadb (>=0.6.3,<0.7.0)",
-    "ollama (>=0.4.7,<0.5.0)"
 ]
 [tool.poetry]

 readme = "README.md"
 requires-python = ">=3.10, <4.0"
 dependencies = [
+    "pydantic-ai[logfire] (>=0.0.20,<0.0.21)",
     "gitpython (>=3.1.44,<4.0.0)",
     "tree-sitter (>=0.24.0,<0.25.0)",
     "tree-sitter-python (>=0.23.6,<0.24.0)",
     "pydantic-settings (>=2.7.1,<3.0.0)",
     "chromadb (>=0.6.3,<0.7.0)",
+    "ollama (>=0.4.7,<0.5.0)",
+    "gradio (>=5.13.1,<6.0.0)"
 ]
 [tool.poetry]

src/know_lang_bot/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import logfire
2	+
3	+ logfire.configure(send_to_logfire='if-token-present')

src/know_lang_bot/chat_bot/chat_agent.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+import chromadb
+from pathlib import Path
+from pydantic_ai import Agent, RunContext
+from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
+from know_lang_bot.utils.fancy_log import FancyLogger
+from pydantic import BaseModel
+import ollama
+import logfire
+LOG = FancyLogger(__name__)
+@dataclass
+class CodeQADeps:
+    """Dependencies for the Code Q&A Agent"""
+    collection: chromadb.Collection
+    config: ChatAppConfig
+class RetrievedContext(BaseModel):
+    """Structure for retrieved context"""
+    chunks: List[str]
+    metadatas: List[Dict[str, Any]]
+    references_md: str
+class AgentResponse(BaseModel):
+    """Structure for agent responses"""
+    answer: str
+    references_md: Optional[str] = None
+# Initialize the agent with system prompt and dependencies
+code_qa_agent = Agent(
+    f'{chat_app_config.llm.model_provider}:{chat_app_config.llm.model_name}',
+    deps_type=CodeQADeps,
+    result_type=AgentResponse,
+    system_prompt="""
+    You are an expert code assistant helping users understand a codebase.
+    Always:
+    1. Reference specific files and line numbers in your explanations
+    2. Be direct and concise while being comprehensive
+    3. If the context is insufficient, explain why
+    4. If you're unsure about something, acknowledge it
+    Your response should be helpful for software engineers trying to understand complex codebases.
+    """,
+)
+@code_qa_agent.tool
+@logfire.instrument()
+async def retrieve_context(
+    ctx: RunContext[CodeQADeps],
+    question: str
+) -> RetrievedContext:
+    """
+    Retrieve relevant code context from the vector database.
+    Args:
+        ctx: The context containing dependencies
+        question: The user's question to find relevant code for
+    """
+    embedded_question = ollama.embed(
+        model=ctx.deps.config.llm.embedding_model,
+        input=question
+    )
+    results = ctx.deps.collection.query(
+        query_embeddings=embedded_question['embeddings'],
+        n_results=ctx.deps.config.chat.max_context_chunks,
+        include=['metadatas', 'documents', 'distances']
+    )
+    relevant_chunks = []
+    relevant_metadatas = []
+    for doc, meta, dist in zip(
+        results['documents'][0],
+        results['metadatas'][0],
+        results['distances'][0]
+    ):
+        if dist <= ctx.deps.config.chat.similarity_threshold:
+            relevant_chunks.append(doc)
+            relevant_metadatas.append(meta)
+    # Format references for display
+    references = []
+    for meta in relevant_metadatas:
+        file_path = Path(meta['file_path']).name
+        ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
+        if meta.get('name'):
+            ref += f"\n- {meta['type']}: `{meta['name']}`"
+        references.append(ref)
+    return RetrievedContext(
+        chunks=relevant_chunks,
+        metadatas=relevant_metadatas,
+        references_md="\n\n".join(references)
+    )

src/know_lang_bot/chat_bot/chat_config.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from pydantic_settings import BaseSettings
+from pydantic import Field
+from know_lang_bot.config import AppConfig
+class ChatConfig(BaseSettings):
+    max_context_chunks: int = Field(
+        default=5,
+        description="Maximum number of similar chunks to include in context"
+    )
+    similarity_threshold: float = Field(
+        default=0.7,
+        description="Minimum similarity score to include a chunk"
+    )
+    interface_title: str = Field(
+        default="Code Repository Q&A Assistant",
+        description="Title shown in the chat interface"
+    )
+    interface_description: str = Field(
+        default="Ask questions about the codebase and I'll help you understand it!",
+        description="Description shown in the chat interface"
+    )
+class ChatAppConfig(AppConfig):
+    chat: ChatConfig = Field(default_factory=ChatConfig)
+chat_app_config = ChatAppConfig()

src/know_lang_bot/chat_bot/chat_interface.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
+from know_lang_bot.utils.fancy_log import FancyLogger
+from know_lang_bot.chat_bot.chat_agent import code_qa_agent, CodeQADeps, AgentResponse
+import chromadb
+from typing import List, Dict
+import logfire
+LOG = FancyLogger(__name__)
+class CodeQAChatInterface:
+    def __init__(self, config: ChatAppConfig):
+        self.config = config
+        self._init_chroma()
+        self.agent = code_qa_agent
+    def _init_chroma(self):
+        """Initialize ChromaDB connection"""
+        self.db_client = chromadb.PersistentClient(
+            path=str(self.config.db.persist_directory)
+        )
+        self.collection = self.db_client.get_collection(
+            name=self.config.db.collection_name
+        )
+    @logfire.instrument('Chatbot Process Question with {message=}')
+    async def process_question(
+        self,
+        message: str,
+        history: List[Dict[str, str]]
+    ) -> AgentResponse:
+        """Process a question and return the answer with references"""
+        try:
+            deps = CodeQADeps(
+                collection=self.collection,
+                config=self.config
+            )
+            response = await self.agent.run(message, deps=deps)
+            return response.data
+        except Exception as e:
+            LOG.error(f"Error processing question: {e}")
+            return AgentResponse(
+                answer="I encountered an error processing your question. Please try again.",
+                references_md=""
+            )
+    def create_interface(self) -> gr.Blocks:
+        """Create the Gradio interface"""
+        with gr.Blocks() as interface:
+            gr.Markdown(f"# {self.config.chat.interface_title}")
+            gr.Markdown(self.config.chat.interface_description)
+            with gr.Row():
+                with gr.Column(scale=2):
+                    chatbot = gr.Chatbot(
+                        type="messages",
+                        bubble_full_width=False
+                    )
+                    msg = gr.Textbox(
+                        label="Ask about the codebase",
+                        placeholder="What does the CodeParser class do?",
+                        container=False
+                    )
+                    clear = gr.ClearButton([msg, chatbot])
+                with gr.Column(scale=1):
+                    references = gr.Markdown(
+                        label="Referenced Code",
+                        value="Code references will appear here..."
+                    )
+            async def respond(message, history):
+                response = await self.process_question(message, history)
+                references.value = response.references_md
+                return {
+                    msg: "",
+                    chatbot: history + [
+                        {"role": "user", "content": message},
+                        {"role": "assistant", "content": response.answer}
+                    ]
+                }
+            msg.submit(respond, [msg, chatbot], [msg, chatbot])
+            clear.click(lambda: [], None, references)
+        return interface
+def create_chatbot() -> gr.Blocks:
+    interface = CodeQAChatInterface(chat_app_config)
+    return interface.create_interface()

src/know_lang_bot/chat_bot/run.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from know_lang_bot.chat_bot.chat_interface import create_chatbot
+demo = create_chatbot()
+demo.launch()