gabykim commited on
Commit
279fcbd
·
1 Parent(s): 410a99b

gradio chat interface draft

Browse files
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -12,13 +12,14 @@ authors = [
12
  readme = "README.md"
13
  requires-python = ">=3.10, <4.0"
14
  dependencies = [
15
- "pydantic-ai (>=0.0.20,<0.0.21)",
16
  "gitpython (>=3.1.44,<4.0.0)",
17
  "tree-sitter (>=0.24.0,<0.25.0)",
18
  "tree-sitter-python (>=0.23.6,<0.24.0)",
19
  "pydantic-settings (>=2.7.1,<3.0.0)",
20
  "chromadb (>=0.6.3,<0.7.0)",
21
- "ollama (>=0.4.7,<0.5.0)"
 
22
  ]
23
 
24
  [tool.poetry]
 
12
  readme = "README.md"
13
  requires-python = ">=3.10, <4.0"
14
  dependencies = [
15
+ "pydantic-ai[logfire] (>=0.0.20,<0.0.21)",
16
  "gitpython (>=3.1.44,<4.0.0)",
17
  "tree-sitter (>=0.24.0,<0.25.0)",
18
  "tree-sitter-python (>=0.23.6,<0.24.0)",
19
  "pydantic-settings (>=2.7.1,<3.0.0)",
20
  "chromadb (>=0.6.3,<0.7.0)",
21
+ "ollama (>=0.4.7,<0.5.0)",
22
+ "gradio (>=5.13.1,<6.0.0)"
23
  ]
24
 
25
  [tool.poetry]
src/know_lang_bot/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import logfire
2
+
3
+ logfire.configure(send_to_logfire='if-token-present')
src/know_lang_bot/chat_bot/chat_agent.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import List, Dict, Any, Optional
3
+ import chromadb
4
+ from pathlib import Path
5
+ from pydantic_ai import Agent, RunContext
6
+ from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
7
+ from know_lang_bot.utils.fancy_log import FancyLogger
8
+ from pydantic import BaseModel
9
+ import ollama
10
+ import logfire
11
+
12
+ LOG = FancyLogger(__name__)
13
+
14
+ @dataclass
15
+ class CodeQADeps:
16
+ """Dependencies for the Code Q&A Agent"""
17
+ collection: chromadb.Collection
18
+ config: ChatAppConfig
19
+
20
+ class RetrievedContext(BaseModel):
21
+ """Structure for retrieved context"""
22
+ chunks: List[str]
23
+ metadatas: List[Dict[str, Any]]
24
+ references_md: str
25
+
26
+ class AgentResponse(BaseModel):
27
+ """Structure for agent responses"""
28
+ answer: str
29
+ references_md: Optional[str] = None
30
+
31
+ # Initialize the agent with system prompt and dependencies
32
+ code_qa_agent = Agent(
33
+ f'{chat_app_config.llm.model_provider}:{chat_app_config.llm.model_name}',
34
+ deps_type=CodeQADeps,
35
+ result_type=AgentResponse,
36
+ system_prompt="""
37
+ You are an expert code assistant helping users understand a codebase.
38
+
39
+ Always:
40
+ 1. Reference specific files and line numbers in your explanations
41
+ 2. Be direct and concise while being comprehensive
42
+ 3. If the context is insufficient, explain why
43
+ 4. If you're unsure about something, acknowledge it
44
+
45
+ Your response should be helpful for software engineers trying to understand complex codebases.
46
+ """,
47
+ )
48
+
49
+ @code_qa_agent.tool
50
+ @logfire.instrument()
51
+ async def retrieve_context(
52
+ ctx: RunContext[CodeQADeps],
53
+ question: str
54
+ ) -> RetrievedContext:
55
+ """
56
+ Retrieve relevant code context from the vector database.
57
+
58
+ Args:
59
+ ctx: The context containing dependencies
60
+ question: The user's question to find relevant code for
61
+ """
62
+ embedded_question = ollama.embed(
63
+ model=ctx.deps.config.llm.embedding_model,
64
+ input=question
65
+ )
66
+
67
+ results = ctx.deps.collection.query(
68
+ query_embeddings=embedded_question['embeddings'],
69
+ n_results=ctx.deps.config.chat.max_context_chunks,
70
+ include=['metadatas', 'documents', 'distances']
71
+ )
72
+
73
+ relevant_chunks = []
74
+ relevant_metadatas = []
75
+
76
+ for doc, meta, dist in zip(
77
+ results['documents'][0],
78
+ results['metadatas'][0],
79
+ results['distances'][0]
80
+ ):
81
+ if dist <= ctx.deps.config.chat.similarity_threshold:
82
+ relevant_chunks.append(doc)
83
+ relevant_metadatas.append(meta)
84
+
85
+
86
+ # Format references for display
87
+ references = []
88
+ for meta in relevant_metadatas:
89
+ file_path = Path(meta['file_path']).name
90
+ ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
91
+ if meta.get('name'):
92
+ ref += f"\n- {meta['type']}: `{meta['name']}`"
93
+ references.append(ref)
94
+
95
+ return RetrievedContext(
96
+ chunks=relevant_chunks,
97
+ metadatas=relevant_metadatas,
98
+ references_md="\n\n".join(references)
99
+ )
100
+
src/know_lang_bot/chat_bot/chat_config.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+ from pydantic import Field
3
+ from know_lang_bot.config import AppConfig
4
+
5
+ class ChatConfig(BaseSettings):
6
+ max_context_chunks: int = Field(
7
+ default=5,
8
+ description="Maximum number of similar chunks to include in context"
9
+ )
10
+ similarity_threshold: float = Field(
11
+ default=0.7,
12
+ description="Minimum similarity score to include a chunk"
13
+ )
14
+ interface_title: str = Field(
15
+ default="Code Repository Q&A Assistant",
16
+ description="Title shown in the chat interface"
17
+ )
18
+ interface_description: str = Field(
19
+ default="Ask questions about the codebase and I'll help you understand it!",
20
+ description="Description shown in the chat interface"
21
+ )
22
+
23
+ class ChatAppConfig(AppConfig):
24
+ chat: ChatConfig = Field(default_factory=ChatConfig)
25
+
26
+
27
+ chat_app_config = ChatAppConfig()
src/know_lang_bot/chat_bot/chat_interface.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
3
+ from know_lang_bot.utils.fancy_log import FancyLogger
4
+ from know_lang_bot.chat_bot.chat_agent import code_qa_agent, CodeQADeps, AgentResponse
5
+ import chromadb
6
+ from typing import List, Dict
7
+ import logfire
8
+
9
+ LOG = FancyLogger(__name__)
10
+
11
+ class CodeQAChatInterface:
12
+ def __init__(self, config: ChatAppConfig):
13
+ self.config = config
14
+ self._init_chroma()
15
+ self.agent = code_qa_agent
16
+
17
+ def _init_chroma(self):
18
+ """Initialize ChromaDB connection"""
19
+ self.db_client = chromadb.PersistentClient(
20
+ path=str(self.config.db.persist_directory)
21
+ )
22
+ self.collection = self.db_client.get_collection(
23
+ name=self.config.db.collection_name
24
+ )
25
+
26
+ @logfire.instrument('Chatbot Process Question with {message=}')
27
+ async def process_question(
28
+ self,
29
+ message: str,
30
+ history: List[Dict[str, str]]
31
+ ) -> AgentResponse:
32
+ """Process a question and return the answer with references"""
33
+ try:
34
+ deps = CodeQADeps(
35
+ collection=self.collection,
36
+ config=self.config
37
+ )
38
+
39
+ response = await self.agent.run(message, deps=deps)
40
+ return response.data
41
+
42
+ except Exception as e:
43
+ LOG.error(f"Error processing question: {e}")
44
+ return AgentResponse(
45
+ answer="I encountered an error processing your question. Please try again.",
46
+ references_md=""
47
+ )
48
+
49
+ def create_interface(self) -> gr.Blocks:
50
+ """Create the Gradio interface"""
51
+ with gr.Blocks() as interface:
52
+ gr.Markdown(f"# {self.config.chat.interface_title}")
53
+ gr.Markdown(self.config.chat.interface_description)
54
+
55
+ with gr.Row():
56
+ with gr.Column(scale=2):
57
+ chatbot = gr.Chatbot(
58
+ type="messages",
59
+ bubble_full_width=False
60
+ )
61
+ msg = gr.Textbox(
62
+ label="Ask about the codebase",
63
+ placeholder="What does the CodeParser class do?",
64
+ container=False
65
+ )
66
+ clear = gr.ClearButton([msg, chatbot])
67
+
68
+ with gr.Column(scale=1):
69
+ references = gr.Markdown(
70
+ label="Referenced Code",
71
+ value="Code references will appear here..."
72
+ )
73
+
74
+ async def respond(message, history):
75
+ response = await self.process_question(message, history)
76
+ references.value = response.references_md
77
+ return {
78
+ msg: "",
79
+ chatbot: history + [
80
+ {"role": "user", "content": message},
81
+ {"role": "assistant", "content": response.answer}
82
+ ]
83
+ }
84
+
85
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
86
+ clear.click(lambda: [], None, references)
87
+
88
+ return interface
89
+
90
+ def create_chatbot() -> gr.Blocks:
91
+ interface = CodeQAChatInterface(chat_app_config)
92
+
93
+ return interface.create_interface()
src/know_lang_bot/chat_bot/run.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from know_lang_bot.chat_bot.chat_interface import create_chatbot
2
+
3
+ demo = create_chatbot()
4
+ demo.launch()