Spaces:

gabykim
/

KnowLang_Transformers_Demo

Sleeping

App Files Files Community

gabykim commited on Jan 29

Commit

aad4327

1 Parent(s): 3cf6c5a

show referenced code block in chat interface

Browse files

Files changed (4) hide show

.env.example +2 -1
src/know_lang_bot/chat_bot/chat_graph.py +4 -19
src/know_lang_bot/chat_bot/chat_interface.py +67 -23
src/know_lang_bot/config.py +4 -0

.env.example CHANGED Viewed

@@ -1,4 +1,5 @@
 LLM__MODEL_NAME=llama3.2
 LLM__MODEL_PROVIDER=ollama
 LLM__API_KEY=your_api_key
-DB__PERSIST_DIRECTORY=./my_chroma_db

 LLM__MODEL_NAME=llama3.2
 LLM__MODEL_PROVIDER=ollama
 LLM__API_KEY=your_api_key
+DB__PERSIST_DIRECTORY=./my_chroma_db
+DB__CODEBASE_DIRECTORY=.

src/know_lang_bot/chat_bot/chat_graph.py CHANGED Viewed

@@ -19,12 +19,11 @@ class RetrievedContext(BaseModel):
     """Structure for retrieved context"""
     chunks: List[str]
     metadatas: List[Dict[str, Any]]
-    references_md: str
 class ChatResult(BaseModel):
     """Final result from the chat graph"""
     answer: str
-    references_md: Optional[str] = None
 @dataclass
 class ChatGraphState:
@@ -98,23 +97,9 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
                     relevant_chunks.append(doc)
                     relevant_metadatas.append(meta)
-            # Format references for display
-            references = []
-            for meta in relevant_metadatas:
-                file_path = meta['file_path'].split('/')[-1]
-                ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
-                if meta.get('name'):
-                    ref += f"\n- {meta['type']}: `{meta['name']}`"
-                references.append(ref)
-            with logfire.span('formatted {count} references', count=len(references)):
-                for ref in references:
-                    logfire.debug(ref)
             ctx.state.retrieved_context = RetrievedContext(
                 chunks=relevant_chunks,
                 metadatas=relevant_metadatas,
-                references_md="\n\n".join(references)
             )
         except Exception as e:
@@ -144,7 +129,7 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
             return End(ChatResult(
                 answer="I couldn't find any relevant code context for your question. "
                       "Could you please rephrase or be more specific?",
-                references_md=""
             ))
         context = ctx.state.retrieved_context
@@ -162,13 +147,13 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
             result = await answer_agent.run(prompt)
             return End(ChatResult(
                 answer=result.data,
-                references_md=context.references_md
             ))
         except Exception as e:
             LOG.error(f"Error generating answer: {e}")
             return End(ChatResult(
                 answer="I encountered an error processing your question. Please try again.",
-                references_md=""
             ))
 # Create the graph

     """Structure for retrieved context"""
     chunks: List[str]
     metadatas: List[Dict[str, Any]]
 class ChatResult(BaseModel):
     """Final result from the chat graph"""
     answer: str
+    retrieved_context: Optional[RetrievedContext] = None
 @dataclass
 class ChatGraphState:
                     relevant_chunks.append(doc)
                     relevant_metadatas.append(meta)
             ctx.state.retrieved_context = RetrievedContext(
                 chunks=relevant_chunks,
                 metadatas=relevant_metadatas,
             )
         except Exception as e:
             return End(ChatResult(
                 answer="I couldn't find any relevant code context for your question. "
                       "Could you please rephrase or be more specific?",
+                retrieved_context=None,
             ))
         context = ctx.state.retrieved_context
             result = await answer_agent.run(prompt)
             return End(ChatResult(
                 answer=result.data,
+                retrieved_context=context,
             ))
         except Exception as e:
             LOG.error(f"Error generating answer: {e}")
             return End(ChatResult(
                 answer="I encountered an error processing your question. Please try again.",
+                retrieved_context=context,
             ))
 # Create the graph

src/know_lang_bot/chat_bot/chat_interface.py CHANGED Viewed

@@ -5,6 +5,7 @@ from know_lang_bot.chat_bot.chat_graph import ChatResult, process_chat
 import chromadb
 from typing import List, Dict
 import logfire
 LOG = FancyLogger(__name__)
@@ -12,6 +13,7 @@ class CodeQAChatInterface:
     def __init__(self, config: ChatAppConfig):
         self.config = config
         self._init_chroma()
     def _init_chroma(self):
         """Initialize ChromaDB connection"""
@@ -22,6 +24,18 @@ class CodeQAChatInterface:
             name=self.config.db.collection_name
         )
     @logfire.instrument('Chatbot Process Question with {message=}')
     async def process_question(
         self,
@@ -37,42 +51,72 @@ class CodeQAChatInterface:
             gr.Markdown(f"# {self.config.chat.interface_title}")
             gr.Markdown(self.config.chat.interface_description)
             with gr.Row():
-                with gr.Column(scale=2):
-                    chatbot = gr.Chatbot(type="messages", bubble_full_width=False)
-                    msg = gr.Textbox(
-                        label="Ask about the codebase",
-                        placeholder="What does the CodeParser class do?",
-                        container=False
-                    )
-                    with gr.Row():
-                        submit = gr.Button("Submit")
-                        clear = gr.ClearButton([msg, chatbot])
-                with gr.Column(scale=1):
-                    references = gr.Markdown(
-                        label="Referenced Code",
-                        value="Code references will appear here..."
-                    )
             async def respond(message, history):
                 result = await self.process_question(message, history)
-                references.value = result.references_md
                 return {
                     msg: "",
-                    chatbot: history + [
-                        {"role": "user", "content": message},
-                        {"role": "assistant", "content": result.answer}
-                    ]
                 }
             msg.submit(respond, [msg, chatbot], [msg, chatbot])
             submit.click(respond, [msg, chatbot], [msg, chatbot])
-            clear.click(lambda: [], None, references)
         return interface
 def create_chatbot() -> gr.Blocks:
     interface = CodeQAChatInterface(chat_app_config)
     return interface.create_interface()

 import chromadb
 from typing import List, Dict
 import logfire
+from pathlib import Path
 LOG = FancyLogger(__name__)
     def __init__(self, config: ChatAppConfig):
         self.config = config
         self._init_chroma()
+        self.codebase_dir = Path(config.db.codebase_directory)
     def _init_chroma(self):
         """Initialize ChromaDB connection"""
             name=self.config.db.collection_name
         )
+    def _get_code_block(self, file_path: str, start_line: int, end_line: int) -> str:
+        """Read the specified lines from a file and return as a code block"""
+        try:
+            full_path = self.codebase_dir / file_path
+            with open(full_path, 'r') as f:
+                lines = f.readlines()
+                code_lines = lines[start_line-1:end_line]
+                return ''.join(code_lines)
+        except Exception as e:
+            LOG.error(f"Error reading code block: {e}")
+            return "Error reading code"
     @logfire.instrument('Chatbot Process Question with {message=}')
     async def process_question(
         self,
             gr.Markdown(f"# {self.config.chat.interface_title}")
             gr.Markdown(self.config.chat.interface_description)
+            chatbot = gr.Chatbot(
+                type="messages",
+                bubble_full_width=False,
+                render_markdown=True
+            )
+            msg = gr.Textbox(
+                label="Ask about the codebase",
+                placeholder="What does the CodeParser class do?",
+                container=False
+            )
             with gr.Row():
+                submit = gr.Button("Submit")
+                clear = gr.ClearButton([msg, chatbot])
             async def respond(message, history):
                 result = await self.process_question(message, history)
+                # Format the answer with code blocks
+                formatted_messages = []
+                # Add user message
+                formatted_messages.append({
+                    "role": "user",
+                    "content": message
+                })
+                # Collect code blocks first
+                code_blocks = []
+                if result.retrieved_context and result.retrieved_context.metadatas:
+                    for metadata in result.retrieved_context.metadatas:
+                        file_path = metadata['file_path']
+                        start_line = metadata['start_line']
+                        end_line = metadata['end_line']
+                        code = self._get_code_block(file_path, start_line, end_line)
+                        if code:
+                            title = f"📄 {file_path} (lines {start_line}-{end_line})"
+                            if metadata.get('name'):
+                                title += f" - {metadata['type']}: {metadata['name']}"
+                            code_blocks.append({
+                                "role": "assistant",
+                                "content": f"<details><summary>{title}</summary>\n\n```python\n{code}\n```\n\n</details>",
+                            })
+                # Add code blocks before the answer
+                formatted_messages.extend(code_blocks)
+                # Add assistant's answer
+                formatted_messages.append({
+                    "role": "assistant",
+                    "content": result.answer
+                })
                 return {
                     msg: "",
+                    chatbot: history + formatted_messages
                 }
             msg.submit(respond, [msg, chatbot], [msg, chatbot])
             submit.click(respond, [msg, chatbot], [msg, chatbot])
         return interface
 def create_chatbot() -> gr.Blocks:
     interface = CodeQAChatInterface(chat_app_config)
     return interface.create_interface()

src/know_lang_bot/config.py CHANGED Viewed

@@ -42,6 +42,10 @@ class DBConfig(BaseSettings):
         default="sentence-transformers/all-mpnet-base-v2",
         description="Embedding model to use"
     )
 class AppConfig(BaseSettings):
     model_config = SettingsConfigDict(

         default="sentence-transformers/all-mpnet-base-v2",
         description="Embedding model to use"
     )
+    codebase_directory: Path = Field(
+        default=Path("./"),
+        description="Root directory of the codebase to analyze"
+    )
 class AppConfig(BaseSettings):
     model_config = SettingsConfigDict(