Spaces:
Sleeping
Sleeping
gradio chat interface draft
Browse files
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -12,13 +12,14 @@ authors = [
|
|
12 |
readme = "README.md"
|
13 |
requires-python = ">=3.10, <4.0"
|
14 |
dependencies = [
|
15 |
-
"pydantic-ai (>=0.0.20,<0.0.21)",
|
16 |
"gitpython (>=3.1.44,<4.0.0)",
|
17 |
"tree-sitter (>=0.24.0,<0.25.0)",
|
18 |
"tree-sitter-python (>=0.23.6,<0.24.0)",
|
19 |
"pydantic-settings (>=2.7.1,<3.0.0)",
|
20 |
"chromadb (>=0.6.3,<0.7.0)",
|
21 |
-
"ollama (>=0.4.7,<0.5.0)"
|
|
|
22 |
]
|
23 |
|
24 |
[tool.poetry]
|
|
|
12 |
readme = "README.md"
|
13 |
requires-python = ">=3.10, <4.0"
|
14 |
dependencies = [
|
15 |
+
"pydantic-ai[logfire] (>=0.0.20,<0.0.21)",
|
16 |
"gitpython (>=3.1.44,<4.0.0)",
|
17 |
"tree-sitter (>=0.24.0,<0.25.0)",
|
18 |
"tree-sitter-python (>=0.23.6,<0.24.0)",
|
19 |
"pydantic-settings (>=2.7.1,<3.0.0)",
|
20 |
"chromadb (>=0.6.3,<0.7.0)",
|
21 |
+
"ollama (>=0.4.7,<0.5.0)",
|
22 |
+
"gradio (>=5.13.1,<6.0.0)"
|
23 |
]
|
24 |
|
25 |
[tool.poetry]
|
src/know_lang_bot/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import logfire
|
2 |
+
|
3 |
+
logfire.configure(send_to_logfire='if-token-present')
|
src/know_lang_bot/chat_bot/chat_agent.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List, Dict, Any, Optional
|
3 |
+
import chromadb
|
4 |
+
from pathlib import Path
|
5 |
+
from pydantic_ai import Agent, RunContext
|
6 |
+
from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
|
7 |
+
from know_lang_bot.utils.fancy_log import FancyLogger
|
8 |
+
from pydantic import BaseModel
|
9 |
+
import ollama
|
10 |
+
import logfire
|
11 |
+
|
12 |
+
LOG = FancyLogger(__name__)
|
13 |
+
|
14 |
+
@dataclass
|
15 |
+
class CodeQADeps:
|
16 |
+
"""Dependencies for the Code Q&A Agent"""
|
17 |
+
collection: chromadb.Collection
|
18 |
+
config: ChatAppConfig
|
19 |
+
|
20 |
+
class RetrievedContext(BaseModel):
|
21 |
+
"""Structure for retrieved context"""
|
22 |
+
chunks: List[str]
|
23 |
+
metadatas: List[Dict[str, Any]]
|
24 |
+
references_md: str
|
25 |
+
|
26 |
+
class AgentResponse(BaseModel):
|
27 |
+
"""Structure for agent responses"""
|
28 |
+
answer: str
|
29 |
+
references_md: Optional[str] = None
|
30 |
+
|
31 |
+
# Initialize the agent with system prompt and dependencies
|
32 |
+
code_qa_agent = Agent(
|
33 |
+
f'{chat_app_config.llm.model_provider}:{chat_app_config.llm.model_name}',
|
34 |
+
deps_type=CodeQADeps,
|
35 |
+
result_type=AgentResponse,
|
36 |
+
system_prompt="""
|
37 |
+
You are an expert code assistant helping users understand a codebase.
|
38 |
+
|
39 |
+
Always:
|
40 |
+
1. Reference specific files and line numbers in your explanations
|
41 |
+
2. Be direct and concise while being comprehensive
|
42 |
+
3. If the context is insufficient, explain why
|
43 |
+
4. If you're unsure about something, acknowledge it
|
44 |
+
|
45 |
+
Your response should be helpful for software engineers trying to understand complex codebases.
|
46 |
+
""",
|
47 |
+
)
|
48 |
+
|
49 |
+
@code_qa_agent.tool
|
50 |
+
@logfire.instrument()
|
51 |
+
async def retrieve_context(
|
52 |
+
ctx: RunContext[CodeQADeps],
|
53 |
+
question: str
|
54 |
+
) -> RetrievedContext:
|
55 |
+
"""
|
56 |
+
Retrieve relevant code context from the vector database.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
ctx: The context containing dependencies
|
60 |
+
question: The user's question to find relevant code for
|
61 |
+
"""
|
62 |
+
embedded_question = ollama.embed(
|
63 |
+
model=ctx.deps.config.llm.embedding_model,
|
64 |
+
input=question
|
65 |
+
)
|
66 |
+
|
67 |
+
results = ctx.deps.collection.query(
|
68 |
+
query_embeddings=embedded_question['embeddings'],
|
69 |
+
n_results=ctx.deps.config.chat.max_context_chunks,
|
70 |
+
include=['metadatas', 'documents', 'distances']
|
71 |
+
)
|
72 |
+
|
73 |
+
relevant_chunks = []
|
74 |
+
relevant_metadatas = []
|
75 |
+
|
76 |
+
for doc, meta, dist in zip(
|
77 |
+
results['documents'][0],
|
78 |
+
results['metadatas'][0],
|
79 |
+
results['distances'][0]
|
80 |
+
):
|
81 |
+
if dist <= ctx.deps.config.chat.similarity_threshold:
|
82 |
+
relevant_chunks.append(doc)
|
83 |
+
relevant_metadatas.append(meta)
|
84 |
+
|
85 |
+
|
86 |
+
# Format references for display
|
87 |
+
references = []
|
88 |
+
for meta in relevant_metadatas:
|
89 |
+
file_path = Path(meta['file_path']).name
|
90 |
+
ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
|
91 |
+
if meta.get('name'):
|
92 |
+
ref += f"\n- {meta['type']}: `{meta['name']}`"
|
93 |
+
references.append(ref)
|
94 |
+
|
95 |
+
return RetrievedContext(
|
96 |
+
chunks=relevant_chunks,
|
97 |
+
metadatas=relevant_metadatas,
|
98 |
+
references_md="\n\n".join(references)
|
99 |
+
)
|
100 |
+
|
src/know_lang_bot/chat_bot/chat_config.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings
|
2 |
+
from pydantic import Field
|
3 |
+
from know_lang_bot.config import AppConfig
|
4 |
+
|
5 |
+
class ChatConfig(BaseSettings):
|
6 |
+
max_context_chunks: int = Field(
|
7 |
+
default=5,
|
8 |
+
description="Maximum number of similar chunks to include in context"
|
9 |
+
)
|
10 |
+
similarity_threshold: float = Field(
|
11 |
+
default=0.7,
|
12 |
+
description="Minimum similarity score to include a chunk"
|
13 |
+
)
|
14 |
+
interface_title: str = Field(
|
15 |
+
default="Code Repository Q&A Assistant",
|
16 |
+
description="Title shown in the chat interface"
|
17 |
+
)
|
18 |
+
interface_description: str = Field(
|
19 |
+
default="Ask questions about the codebase and I'll help you understand it!",
|
20 |
+
description="Description shown in the chat interface"
|
21 |
+
)
|
22 |
+
|
23 |
+
class ChatAppConfig(AppConfig):
|
24 |
+
chat: ChatConfig = Field(default_factory=ChatConfig)
|
25 |
+
|
26 |
+
|
27 |
+
chat_app_config = ChatAppConfig()
|
src/know_lang_bot/chat_bot/chat_interface.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from know_lang_bot.chat_bot.chat_config import ChatAppConfig, chat_app_config
|
3 |
+
from know_lang_bot.utils.fancy_log import FancyLogger
|
4 |
+
from know_lang_bot.chat_bot.chat_agent import code_qa_agent, CodeQADeps, AgentResponse
|
5 |
+
import chromadb
|
6 |
+
from typing import List, Dict
|
7 |
+
import logfire
|
8 |
+
|
9 |
+
LOG = FancyLogger(__name__)
|
10 |
+
|
11 |
+
class CodeQAChatInterface:
|
12 |
+
def __init__(self, config: ChatAppConfig):
|
13 |
+
self.config = config
|
14 |
+
self._init_chroma()
|
15 |
+
self.agent = code_qa_agent
|
16 |
+
|
17 |
+
def _init_chroma(self):
|
18 |
+
"""Initialize ChromaDB connection"""
|
19 |
+
self.db_client = chromadb.PersistentClient(
|
20 |
+
path=str(self.config.db.persist_directory)
|
21 |
+
)
|
22 |
+
self.collection = self.db_client.get_collection(
|
23 |
+
name=self.config.db.collection_name
|
24 |
+
)
|
25 |
+
|
26 |
+
@logfire.instrument('Chatbot Process Question with {message=}')
|
27 |
+
async def process_question(
|
28 |
+
self,
|
29 |
+
message: str,
|
30 |
+
history: List[Dict[str, str]]
|
31 |
+
) -> AgentResponse:
|
32 |
+
"""Process a question and return the answer with references"""
|
33 |
+
try:
|
34 |
+
deps = CodeQADeps(
|
35 |
+
collection=self.collection,
|
36 |
+
config=self.config
|
37 |
+
)
|
38 |
+
|
39 |
+
response = await self.agent.run(message, deps=deps)
|
40 |
+
return response.data
|
41 |
+
|
42 |
+
except Exception as e:
|
43 |
+
LOG.error(f"Error processing question: {e}")
|
44 |
+
return AgentResponse(
|
45 |
+
answer="I encountered an error processing your question. Please try again.",
|
46 |
+
references_md=""
|
47 |
+
)
|
48 |
+
|
49 |
+
def create_interface(self) -> gr.Blocks:
|
50 |
+
"""Create the Gradio interface"""
|
51 |
+
with gr.Blocks() as interface:
|
52 |
+
gr.Markdown(f"# {self.config.chat.interface_title}")
|
53 |
+
gr.Markdown(self.config.chat.interface_description)
|
54 |
+
|
55 |
+
with gr.Row():
|
56 |
+
with gr.Column(scale=2):
|
57 |
+
chatbot = gr.Chatbot(
|
58 |
+
type="messages",
|
59 |
+
bubble_full_width=False
|
60 |
+
)
|
61 |
+
msg = gr.Textbox(
|
62 |
+
label="Ask about the codebase",
|
63 |
+
placeholder="What does the CodeParser class do?",
|
64 |
+
container=False
|
65 |
+
)
|
66 |
+
clear = gr.ClearButton([msg, chatbot])
|
67 |
+
|
68 |
+
with gr.Column(scale=1):
|
69 |
+
references = gr.Markdown(
|
70 |
+
label="Referenced Code",
|
71 |
+
value="Code references will appear here..."
|
72 |
+
)
|
73 |
+
|
74 |
+
async def respond(message, history):
|
75 |
+
response = await self.process_question(message, history)
|
76 |
+
references.value = response.references_md
|
77 |
+
return {
|
78 |
+
msg: "",
|
79 |
+
chatbot: history + [
|
80 |
+
{"role": "user", "content": message},
|
81 |
+
{"role": "assistant", "content": response.answer}
|
82 |
+
]
|
83 |
+
}
|
84 |
+
|
85 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
86 |
+
clear.click(lambda: [], None, references)
|
87 |
+
|
88 |
+
return interface
|
89 |
+
|
90 |
+
def create_chatbot() -> gr.Blocks:
|
91 |
+
interface = CodeQAChatInterface(chat_app_config)
|
92 |
+
|
93 |
+
return interface.create_interface()
|
src/know_lang_bot/chat_bot/run.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from know_lang_bot.chat_bot.chat_interface import create_chatbot
|
2 |
+
|
3 |
+
demo = create_chatbot()
|
4 |
+
demo.launch()
|