gabykim commited on
Commit
aad4327
·
1 Parent(s): 3cf6c5a

show referenced code block in chat interface

Browse files
.env.example CHANGED
@@ -1,4 +1,5 @@
1
  LLM__MODEL_NAME=llama3.2
2
  LLM__MODEL_PROVIDER=ollama
3
  LLM__API_KEY=your_api_key
4
- DB__PERSIST_DIRECTORY=./my_chroma_db
 
 
1
  LLM__MODEL_NAME=llama3.2
2
  LLM__MODEL_PROVIDER=ollama
3
  LLM__API_KEY=your_api_key
4
+ DB__PERSIST_DIRECTORY=./my_chroma_db
5
+ DB__CODEBASE_DIRECTORY=.
src/know_lang_bot/chat_bot/chat_graph.py CHANGED
@@ -19,12 +19,11 @@ class RetrievedContext(BaseModel):
19
  """Structure for retrieved context"""
20
  chunks: List[str]
21
  metadatas: List[Dict[str, Any]]
22
- references_md: str
23
 
24
  class ChatResult(BaseModel):
25
  """Final result from the chat graph"""
26
  answer: str
27
- references_md: Optional[str] = None
28
 
29
  @dataclass
30
  class ChatGraphState:
@@ -98,23 +97,9 @@ class RetrieveContextNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
98
  relevant_chunks.append(doc)
99
  relevant_metadatas.append(meta)
100
 
101
- # Format references for display
102
- references = []
103
- for meta in relevant_metadatas:
104
- file_path = meta['file_path'].split('/')[-1]
105
- ref = f"**{file_path}** (lines {meta['start_line']}-{meta['end_line']})"
106
- if meta.get('name'):
107
- ref += f"\n- {meta['type']}: `{meta['name']}`"
108
- references.append(ref)
109
-
110
- with logfire.span('formatted {count} references', count=len(references)):
111
- for ref in references:
112
- logfire.debug(ref)
113
-
114
  ctx.state.retrieved_context = RetrievedContext(
115
  chunks=relevant_chunks,
116
  metadatas=relevant_metadatas,
117
- references_md="\n\n".join(references)
118
  )
119
 
120
  except Exception as e:
@@ -144,7 +129,7 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
144
  return End(ChatResult(
145
  answer="I couldn't find any relevant code context for your question. "
146
  "Could you please rephrase or be more specific?",
147
- references_md=""
148
  ))
149
 
150
  context = ctx.state.retrieved_context
@@ -162,13 +147,13 @@ class AnswerQuestionNode(BaseNode[ChatGraphState, ChatGraphDeps, ChatResult]):
162
  result = await answer_agent.run(prompt)
163
  return End(ChatResult(
164
  answer=result.data,
165
- references_md=context.references_md
166
  ))
167
  except Exception as e:
168
  LOG.error(f"Error generating answer: {e}")
169
  return End(ChatResult(
170
  answer="I encountered an error processing your question. Please try again.",
171
- references_md=""
172
  ))
173
 
174
  # Create the graph
 
19
  """Structure for retrieved context"""
20
  chunks: List[str]
21
  metadatas: List[Dict[str, Any]]
 
22
 
23
  class ChatResult(BaseModel):
24
  """Final result from the chat graph"""
25
  answer: str
26
+ retrieved_context: Optional[RetrievedContext] = None
27
 
28
  @dataclass
29
  class ChatGraphState:
 
97
  relevant_chunks.append(doc)
98
  relevant_metadatas.append(meta)
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ctx.state.retrieved_context = RetrievedContext(
101
  chunks=relevant_chunks,
102
  metadatas=relevant_metadatas,
 
103
  )
104
 
105
  except Exception as e:
 
129
  return End(ChatResult(
130
  answer="I couldn't find any relevant code context for your question. "
131
  "Could you please rephrase or be more specific?",
132
+ retrieved_context=None,
133
  ))
134
 
135
  context = ctx.state.retrieved_context
 
147
  result = await answer_agent.run(prompt)
148
  return End(ChatResult(
149
  answer=result.data,
150
+ retrieved_context=context,
151
  ))
152
  except Exception as e:
153
  LOG.error(f"Error generating answer: {e}")
154
  return End(ChatResult(
155
  answer="I encountered an error processing your question. Please try again.",
156
+ retrieved_context=context,
157
  ))
158
 
159
  # Create the graph
src/know_lang_bot/chat_bot/chat_interface.py CHANGED
@@ -5,6 +5,7 @@ from know_lang_bot.chat_bot.chat_graph import ChatResult, process_chat
5
  import chromadb
6
  from typing import List, Dict
7
  import logfire
 
8
 
9
  LOG = FancyLogger(__name__)
10
 
@@ -12,6 +13,7 @@ class CodeQAChatInterface:
12
  def __init__(self, config: ChatAppConfig):
13
  self.config = config
14
  self._init_chroma()
 
15
 
16
  def _init_chroma(self):
17
  """Initialize ChromaDB connection"""
@@ -22,6 +24,18 @@ class CodeQAChatInterface:
22
  name=self.config.db.collection_name
23
  )
24
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  @logfire.instrument('Chatbot Process Question with {message=}')
26
  async def process_question(
27
  self,
@@ -37,42 +51,72 @@ class CodeQAChatInterface:
37
  gr.Markdown(f"# {self.config.chat.interface_title}")
38
  gr.Markdown(self.config.chat.interface_description)
39
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  with gr.Row():
41
- with gr.Column(scale=2):
42
- chatbot = gr.Chatbot(type="messages", bubble_full_width=False)
43
- msg = gr.Textbox(
44
- label="Ask about the codebase",
45
- placeholder="What does the CodeParser class do?",
46
- container=False
47
- )
48
- with gr.Row():
49
- submit = gr.Button("Submit")
50
- clear = gr.ClearButton([msg, chatbot])
51
-
52
- with gr.Column(scale=1):
53
- references = gr.Markdown(
54
- label="Referenced Code",
55
- value="Code references will appear here..."
56
- )
57
 
58
  async def respond(message, history):
59
  result = await self.process_question(message, history)
60
- references.value = result.references_md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  return {
62
  msg: "",
63
- chatbot: history + [
64
- {"role": "user", "content": message},
65
- {"role": "assistant", "content": result.answer}
66
- ]
67
  }
68
 
69
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
70
  submit.click(respond, [msg, chatbot], [msg, chatbot])
71
- clear.click(lambda: [], None, references)
72
 
73
  return interface
74
 
75
  def create_chatbot() -> gr.Blocks:
76
  interface = CodeQAChatInterface(chat_app_config)
77
-
78
  return interface.create_interface()
 
5
  import chromadb
6
  from typing import List, Dict
7
  import logfire
8
+ from pathlib import Path
9
 
10
  LOG = FancyLogger(__name__)
11
 
 
13
  def __init__(self, config: ChatAppConfig):
14
  self.config = config
15
  self._init_chroma()
16
+ self.codebase_dir = Path(config.db.codebase_directory)
17
 
18
  def _init_chroma(self):
19
  """Initialize ChromaDB connection"""
 
24
  name=self.config.db.collection_name
25
  )
26
 
27
+ def _get_code_block(self, file_path: str, start_line: int, end_line: int) -> str:
28
+ """Read the specified lines from a file and return as a code block"""
29
+ try:
30
+ full_path = self.codebase_dir / file_path
31
+ with open(full_path, 'r') as f:
32
+ lines = f.readlines()
33
+ code_lines = lines[start_line-1:end_line]
34
+ return ''.join(code_lines)
35
+ except Exception as e:
36
+ LOG.error(f"Error reading code block: {e}")
37
+ return "Error reading code"
38
+
39
  @logfire.instrument('Chatbot Process Question with {message=}')
40
  async def process_question(
41
  self,
 
51
  gr.Markdown(f"# {self.config.chat.interface_title}")
52
  gr.Markdown(self.config.chat.interface_description)
53
 
54
+ chatbot = gr.Chatbot(
55
+ type="messages",
56
+ bubble_full_width=False,
57
+ render_markdown=True
58
+ )
59
+
60
+ msg = gr.Textbox(
61
+ label="Ask about the codebase",
62
+ placeholder="What does the CodeParser class do?",
63
+ container=False
64
+ )
65
+
66
  with gr.Row():
67
+ submit = gr.Button("Submit")
68
+ clear = gr.ClearButton([msg, chatbot])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  async def respond(message, history):
71
  result = await self.process_question(message, history)
72
+
73
+ # Format the answer with code blocks
74
+ formatted_messages = []
75
+
76
+ # Add user message
77
+ formatted_messages.append({
78
+ "role": "user",
79
+ "content": message
80
+ })
81
+
82
+ # Collect code blocks first
83
+ code_blocks = []
84
+ if result.retrieved_context and result.retrieved_context.metadatas:
85
+ for metadata in result.retrieved_context.metadatas:
86
+ file_path = metadata['file_path']
87
+ start_line = metadata['start_line']
88
+ end_line = metadata['end_line']
89
+
90
+ code = self._get_code_block(file_path, start_line, end_line)
91
+ if code:
92
+ title = f"📄 {file_path} (lines {start_line}-{end_line})"
93
+ if metadata.get('name'):
94
+ title += f" - {metadata['type']}: {metadata['name']}"
95
+
96
+ code_blocks.append({
97
+ "role": "assistant",
98
+ "content": f"<details><summary>{title}</summary>\n\n```python\n{code}\n```\n\n</details>",
99
+ })
100
+
101
+ # Add code blocks before the answer
102
+ formatted_messages.extend(code_blocks)
103
+
104
+ # Add assistant's answer
105
+ formatted_messages.append({
106
+ "role": "assistant",
107
+ "content": result.answer
108
+ })
109
+
110
  return {
111
  msg: "",
112
+ chatbot: history + formatted_messages
 
 
 
113
  }
114
 
115
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
116
  submit.click(respond, [msg, chatbot], [msg, chatbot])
 
117
 
118
  return interface
119
 
120
  def create_chatbot() -> gr.Blocks:
121
  interface = CodeQAChatInterface(chat_app_config)
 
122
  return interface.create_interface()
src/know_lang_bot/config.py CHANGED
@@ -42,6 +42,10 @@ class DBConfig(BaseSettings):
42
  default="sentence-transformers/all-mpnet-base-v2",
43
  description="Embedding model to use"
44
  )
 
 
 
 
45
 
46
  class AppConfig(BaseSettings):
47
  model_config = SettingsConfigDict(
 
42
  default="sentence-transformers/all-mpnet-base-v2",
43
  description="Embedding model to use"
44
  )
45
+ codebase_directory: Path = Field(
46
+ default=Path("./"),
47
+ description="Root directory of the codebase to analyze"
48
+ )
49
 
50
  class AppConfig(BaseSettings):
51
  model_config = SettingsConfigDict(