Spaces:
Sleeping
Sleeping
show referenced code block in chat interface
Browse files
.env.example
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
LLM__MODEL_NAME=llama3.2
|
2 |
LLM__MODEL_PROVIDER=ollama
|
3 |
LLM__API_KEY=your_api_key
|
4 |
-
DB__PERSIST_DIRECTORY=./my_chroma_db
|
|
|
|
1 |
LLM__MODEL_NAME=llama3.2
|
2 |
LLM__MODEL_PROVIDER=ollama
|
3 |
LLM__API_KEY=your_api_key
|
4 |
+
DB__PERSIST_DIRECTORY=./my_chroma_db
|
5 |
+
DB__CODEBASE_DIRECTORY=.
|
src/know_lang_bot/chat_bot/chat_graph.py
CHANGED
@@ -19,12 +19,11 @@ class RetrievedContext(BaseModel):
|
|
19 |
"""Structure for retrieved context"""
|
20 |
chunks: List[str]
|
21 |
metadatas: List[Dict[str, Any]]
|
22 |
-
references_md: str
|
23 |
|
24 |
class ChatResult(BaseModel):
|
25 |
"""Final result from the chat graph"""
|
26 |
answer: str
|
27 |
-
|
28 |
|
29 |
@dataclass
|
30 |
class ChatGraphState:
|
@@ -98,23 +97,9 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
|
|
98 |
relevant_chunks.append(doc)
|
99 |
relevant_metadatas.append(meta)
|
100 |
|
101 |
-
# Format references for display
|
102 |
-
references = []
|
103 |
-
for meta in relevant_metadatas:
|
104 |
-
file_path = meta['file_path'].split('/')[-1]
|
105 |
-
ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
|
106 |
-
if meta.get('name'):
|
107 |
-
ref += f"\n- {meta['type']}: `{meta['name']}`"
|
108 |
-
references.append(ref)
|
109 |
-
|
110 |
-
with logfire.span('formatted {count} references', count=len(references)):
|
111 |
-
for ref in references:
|
112 |
-
logfire.debug(ref)
|
113 |
-
|
114 |
ctx.state.retrieved_context = RetrievedContext(
|
115 |
chunks=relevant_chunks,
|
116 |
metadatas=relevant_metadatas,
|
117 |
-
references_md="\n\n".join(references)
|
118 |
)
|
119 |
|
120 |
except Exception as e:
|
@@ -144,7 +129,7 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
|
|
144 |
return End(ChatResult(
|
145 |
answer="I couldn't find any relevant code context for your question. "
|
146 |
"Could you please rephrase or be more specific?",
|
147 |
-
|
148 |
))
|
149 |
|
150 |
context = ctx.state.retrieved_context
|
@@ -162,13 +147,13 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
|
|
162 |
result = await answer_agent.run(prompt)
|
163 |
return End(ChatResult(
|
164 |
answer=result.data,
|
165 |
-
|
166 |
))
|
167 |
except Exception as e:
|
168 |
LOG.error(f"Error generating answer: {e}")
|
169 |
return End(ChatResult(
|
170 |
answer="I encountered an error processing your question. Please try again.",
|
171 |
-
|
172 |
))
|
173 |
|
174 |
# Create the graph
|
|
|
19 |
"""Structure for retrieved context"""
|
20 |
chunks: List[str]
|
21 |
metadatas: List[Dict[str, Any]]
|
|
|
22 |
|
23 |
class ChatResult(BaseModel):
|
24 |
"""Final result from the chat graph"""
|
25 |
answer: str
|
26 |
+
retrieved_context: Optional[RetrievedContext] = None
|
27 |
|
28 |
@dataclass
|
29 |
class ChatGraphState:
|
|
|
97 |
relevant_chunks.append(doc)
|
98 |
relevant_metadatas.append(meta)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
ctx.state.retrieved_context = RetrievedContext(
|
101 |
chunks=relevant_chunks,
|
102 |
metadatas=relevant_metadatas,
|
|
|
103 |
)
|
104 |
|
105 |
except Exception as e:
|
|
|
129 |
return End(ChatResult(
|
130 |
answer="I couldn't find any relevant code context for your question. "
|
131 |
"Could you please rephrase or be more specific?",
|
132 |
+
retrieved_context=None,
|
133 |
))
|
134 |
|
135 |
context = ctx.state.retrieved_context
|
|
|
147 |
result = await answer_agent.run(prompt)
|
148 |
return End(ChatResult(
|
149 |
answer=result.data,
|
150 |
+
retrieved_context=context,
|
151 |
))
|
152 |
except Exception as e:
|
153 |
LOG.error(f"Error generating answer: {e}")
|
154 |
return End(ChatResult(
|
155 |
answer="I encountered an error processing your question. Please try again.",
|
156 |
+
retrieved_context=context,
|
157 |
))
|
158 |
|
159 |
# Create the graph
|
src/know_lang_bot/chat_bot/chat_interface.py
CHANGED
@@ -5,6 +5,7 @@ from know_lang_bot.chat_bot.chat_graph import ChatResult, process_chat
|
|
5 |
import chromadb
|
6 |
from typing import List, Dict
|
7 |
import logfire
|
|
|
8 |
|
9 |
LOG = FancyLogger(__name__)
|
10 |
|
@@ -12,6 +13,7 @@ class CodeQAChatInterface:
|
|
12 |
def __init__(self, config: ChatAppConfig):
|
13 |
self.config = config
|
14 |
self._init_chroma()
|
|
|
15 |
|
16 |
def _init_chroma(self):
|
17 |
"""Initialize ChromaDB connection"""
|
@@ -22,6 +24,18 @@ class CodeQAChatInterface:
|
|
22 |
name=self.config.db.collection_name
|
23 |
)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
@logfire.instrument('Chatbot Process Question with {message=}')
|
26 |
async def process_question(
|
27 |
self,
|
@@ -37,42 +51,72 @@ class CodeQAChatInterface:
|
|
37 |
gr.Markdown(f"# {self.config.chat.interface_title}")
|
38 |
gr.Markdown(self.config.chat.interface_description)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
with gr.Row():
|
41 |
-
|
42 |
-
|
43 |
-
msg = gr.Textbox(
|
44 |
-
label="Ask about the codebase",
|
45 |
-
placeholder="What does the CodeParser class do?",
|
46 |
-
container=False
|
47 |
-
)
|
48 |
-
with gr.Row():
|
49 |
-
submit = gr.Button("Submit")
|
50 |
-
clear = gr.ClearButton([msg, chatbot])
|
51 |
-
|
52 |
-
with gr.Column(scale=1):
|
53 |
-
references = gr.Markdown(
|
54 |
-
label="Referenced Code",
|
55 |
-
value="Code references will appear here..."
|
56 |
-
)
|
57 |
|
58 |
async def respond(message, history):
|
59 |
result = await self.process_question(message, history)
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
return {
|
62 |
msg: "",
|
63 |
-
chatbot: history +
|
64 |
-
{"role": "user", "content": message},
|
65 |
-
{"role": "assistant", "content": result.answer}
|
66 |
-
]
|
67 |
}
|
68 |
|
69 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
70 |
submit.click(respond, [msg, chatbot], [msg, chatbot])
|
71 |
-
clear.click(lambda: [], None, references)
|
72 |
|
73 |
return interface
|
74 |
|
75 |
def create_chatbot() -> gr.Blocks:
|
76 |
interface = CodeQAChatInterface(chat_app_config)
|
77 |
-
|
78 |
return interface.create_interface()
|
|
|
5 |
import chromadb
|
6 |
from typing import List, Dict
|
7 |
import logfire
|
8 |
+
from pathlib import Path
|
9 |
|
10 |
LOG = FancyLogger(__name__)
|
11 |
|
|
|
13 |
def __init__(self, config: ChatAppConfig):
|
14 |
self.config = config
|
15 |
self._init_chroma()
|
16 |
+
self.codebase_dir = Path(config.db.codebase_directory)
|
17 |
|
18 |
def _init_chroma(self):
|
19 |
"""Initialize ChromaDB connection"""
|
|
|
24 |
name=self.config.db.collection_name
|
25 |
)
|
26 |
|
27 |
+
def _get_code_block(self, file_path: str, start_line: int, end_line: int) -> str:
|
28 |
+
"""Read the specified lines from a file and return as a code block"""
|
29 |
+
try:
|
30 |
+
full_path = self.codebase_dir / file_path
|
31 |
+
with open(full_path, 'r') as f:
|
32 |
+
lines = f.readlines()
|
33 |
+
code_lines = lines[start_line-1:end_line]
|
34 |
+
return ''.join(code_lines)
|
35 |
+
except Exception as e:
|
36 |
+
LOG.error(f"Error reading code block: {e}")
|
37 |
+
return "Error reading code"
|
38 |
+
|
39 |
@logfire.instrument('Chatbot Process Question with {message=}')
|
40 |
async def process_question(
|
41 |
self,
|
|
|
51 |
gr.Markdown(f"# {self.config.chat.interface_title}")
|
52 |
gr.Markdown(self.config.chat.interface_description)
|
53 |
|
54 |
+
chatbot = gr.Chatbot(
|
55 |
+
type="messages",
|
56 |
+
bubble_full_width=False,
|
57 |
+
render_markdown=True
|
58 |
+
)
|
59 |
+
|
60 |
+
msg = gr.Textbox(
|
61 |
+
label="Ask about the codebase",
|
62 |
+
placeholder="What does the CodeParser class do?",
|
63 |
+
container=False
|
64 |
+
)
|
65 |
+
|
66 |
with gr.Row():
|
67 |
+
submit = gr.Button("Submit")
|
68 |
+
clear = gr.ClearButton([msg, chatbot])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
async def respond(message, history):
|
71 |
result = await self.process_question(message, history)
|
72 |
+
|
73 |
+
# Format the answer with code blocks
|
74 |
+
formatted_messages = []
|
75 |
+
|
76 |
+
# Add user message
|
77 |
+
formatted_messages.append({
|
78 |
+
"role": "user",
|
79 |
+
"content": message
|
80 |
+
})
|
81 |
+
|
82 |
+
# Collect code blocks first
|
83 |
+
code_blocks = []
|
84 |
+
if result.retrieved_context and result.retrieved_context.metadatas:
|
85 |
+
for metadata in result.retrieved_context.metadatas:
|
86 |
+
file_path = metadata['file_path']
|
87 |
+
start_line = metadata['start_line']
|
88 |
+
end_line = metadata['end_line']
|
89 |
+
|
90 |
+
code = self._get_code_block(file_path, start_line, end_line)
|
91 |
+
if code:
|
92 |
+
title = f"📄 {file_path} (lines {start_line}-{end_line})"
|
93 |
+
if metadata.get('name'):
|
94 |
+
title += f" - {metadata['type']}: {metadata['name']}"
|
95 |
+
|
96 |
+
code_blocks.append({
|
97 |
+
"role": "assistant",
|
98 |
+
"content": f"<details><summary>{title}</summary>\n\n```python\n{code}\n```\n\n</details>",
|
99 |
+
})
|
100 |
+
|
101 |
+
# Add code blocks before the answer
|
102 |
+
formatted_messages.extend(code_blocks)
|
103 |
+
|
104 |
+
# Add assistant's answer
|
105 |
+
formatted_messages.append({
|
106 |
+
"role": "assistant",
|
107 |
+
"content": result.answer
|
108 |
+
})
|
109 |
+
|
110 |
return {
|
111 |
msg: "",
|
112 |
+
chatbot: history + formatted_messages
|
|
|
|
|
|
|
113 |
}
|
114 |
|
115 |
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
116 |
submit.click(respond, [msg, chatbot], [msg, chatbot])
|
|
|
117 |
|
118 |
return interface
|
119 |
|
120 |
def create_chatbot() -> gr.Blocks:
|
121 |
interface = CodeQAChatInterface(chat_app_config)
|
|
|
122 |
return interface.create_interface()
|
src/know_lang_bot/config.py
CHANGED
@@ -42,6 +42,10 @@ class DBConfig(BaseSettings):
|
|
42 |
default="sentence-transformers/all-mpnet-base-v2",
|
43 |
description="Embedding model to use"
|
44 |
)
|
|
|
|
|
|
|
|
|
45 |
|
46 |
class AppConfig(BaseSettings):
|
47 |
model_config = SettingsConfigDict(
|
|
|
42 |
default="sentence-transformers/all-mpnet-base-v2",
|
43 |
description="Embedding model to use"
|
44 |
)
|
45 |
+
codebase_directory: Path = Field(
|
46 |
+
default=Path("./"),
|
47 |
+
description="Root directory of the codebase to analyze"
|
48 |
+
)
|
49 |
|
50 |
class AppConfig(BaseSettings):
|
51 |
model_config = SettingsConfigDict(
|