Ali2206 commited on
Commit
7a8204e
·
verified ·
1 Parent(s): 0fb33af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -158
app.py CHANGED
@@ -32,10 +32,10 @@ sys.path.insert(0, src_path)
32
  from txagent.txagent import TxAgent
33
 
34
  # Constants
35
- MAX_MODEL_TOKENS = 32768 # Model's maximum sequence length
36
- MAX_CHUNK_TOKENS = 8192 # Chunk size aligned with max_num_batched_tokens
37
- MAX_NEW_TOKENS = 2048 # Maximum tokens for generation
38
- PROMPT_OVERHEAD = 500 # Estimated tokens for prompt template overhead
39
 
40
  def clean_response(text: str) -> str:
41
  try:
@@ -48,11 +48,9 @@ def clean_response(text: str) -> str:
48
  return text.strip()
49
 
50
  def estimate_tokens(text: str) -> int:
51
- """Estimate the number of tokens based on character length."""
52
- return len(text) // 3.5 + 1 # Add 1 to avoid zero estimates
53
 
54
  def extract_text_from_excel(file_path: str) -> str:
55
- """Extract text from all sheets in an Excel file."""
56
  all_text = []
57
  try:
58
  xls = pd.ExcelFile(file_path)
@@ -67,10 +65,6 @@ def extract_text_from_excel(file_path: str) -> str:
67
  return "\n".join(all_text)
68
 
69
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
70
- """
71
- Split text into chunks, ensuring each chunk is within token limits,
72
- accounting for prompt overhead.
73
- """
74
  effective_max_tokens = max_tokens - PROMPT_OVERHEAD
75
  if effective_max_tokens <= 0:
76
  raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
@@ -83,7 +77,7 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
83
  for line in lines:
84
  line_tokens = estimate_tokens(line)
85
  if current_tokens + line_tokens > effective_max_tokens:
86
- if current_chunk: # Save the current chunk if it's not empty
87
  chunks.append("\n".join(current_chunk))
88
  current_chunk = [line]
89
  current_tokens = line_tokens
@@ -97,7 +91,6 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
97
  return chunks
98
 
99
  def build_prompt_from_text(chunk: str) -> str:
100
- """Build a prompt for analyzing a chunk of clinical data."""
101
  return f"""
102
  ### Unstructured Clinical Records
103
 
@@ -118,7 +111,6 @@ Please analyze the above and provide:
118
  """
119
 
120
  def init_agent():
121
- """Initialize the TxAgent with model and tool configurations."""
122
  default_tool_path = os.path.abspath("data/new_tool.json")
123
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
124
 
@@ -139,148 +131,55 @@ def init_agent():
139
  return agent
140
 
141
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
142
- """Process the Excel file and generate a final report."""
143
- messages = chatbot_state if chatbot_state else []
144
- report_path = None
145
-
146
- if file is None or not hasattr(file, "name"):
147
- messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
148
- return messages, report_path
149
-
150
- try:
151
- messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
152
- messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})
153
-
154
- # Extract text and split into chunks
155
- extracted_text = extract_text_from_excel(file.name)
156
- chunks = split_text_into_chunks(extracted_text, max_tokens=MAX_CHUNK_TOKENS)
157
- chunk_responses = []
158
-
159
- # Process each chunk
160
- for i, chunk in enumerate(chunks):
161
- messages.append({"role": "assistant", "content": f"🔍 Analyzing chunk {i+1}/{len(chunks)}..."})
162
-
163
- prompt = build_prompt_from_text(chunk)
164
- prompt_tokens = estimate_tokens(prompt)
165
- if prompt_tokens > MAX_MODEL_TOKENS:
166
- messages.append({"role": "assistant", "content": f"❌ Chunk {i+1} prompt too long ({prompt_tokens} tokens). Skipping..."})
167
- continue
168
-
169
- response = ""
170
- try:
171
- for result in agent.run_gradio_chat(
172
- message=prompt,
173
- history=[],
174
- temperature=0.2,
175
- max_new_tokens=MAX_NEW_TOKENS,
176
- max_token=MAX_MODEL_TOKENS,
177
- call_agent=False,
178
- conversation=[],
179
- ):
180
- if isinstance(result, str):
181
- response += result
182
- elif hasattr(result, "content"):
183
- response += result.content
184
- elif isinstance(result, list):
185
- for r in result:
186
- if hasattr(r, "content"):
187
- response += r.content
188
- except Exception as e:
189
- messages.append({"role": "assistant", "content": f"❌ Error analyzing chunk {i+1}: {str(e)}"})
190
- continue
191
-
192
- chunk_responses.append(clean_response(response))
193
- messages.append({"role": "assistant", "content": f"✅ Chunk {i+1} analysis complete"})
194
-
195
- if not chunk_responses:
196
- messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
197
- return messages, report_path
198
-
199
- # Summarize chunk responses incrementally to avoid token limit
200
- summary = ""
201
- current_summary_tokens = 0
202
- for i, response in enumerate(chunk_responses):
203
- response_tokens = estimate_tokens(response)
204
- if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
205
- # Summarize current summary
206
- summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
207
- summary_response = ""
208
- try:
209
- for result in agent.run_gradio_chat(
210
- message=summary_prompt,
211
- history=[],
212
- temperature=0.2,
213
- max_new_tokens=MAX_NEW_TOKENS,
214
- max_token=MAX_MODEL_TOKENS,
215
- call_agent=False,
216
- conversation=[],
217
- ):
218
- if isinstance(result, str):
219
- summary_response += result
220
- elif hasattr(result, "content"):
221
- summary_response += result.content
222
- elif isinstance(result, list):
223
- for r in result:
224
- if hasattr(r, "content"):
225
- summary_response += r.content
226
- summary = clean_response(summary_response)
227
- current_summary_tokens = estimate_tokens(summary)
228
- except Exception as e:
229
- messages.append({"role": "assistant", "content": f"❌ Error summarizing intermediate results: {str(e)}"})
230
- return messages, report_path
231
-
232
- summary += f"\n\n### Chunk {i+1} Analysis\n{response}"
233
- current_summary_tokens += response_tokens
234
-
235
- # Final summarization
236
- final_prompt = f"Summarize the key findings from the following analyses:\n\n{summary}"
237
- messages.append({"role": "assistant", "content": "📊 Generating final report..."})
238
-
239
- final_report_text = ""
240
- try:
241
- for result in agent.run_gradio_chat(
242
- message=final_prompt,
243
- history=[],
244
- temperature=0.2,
245
- max_new_tokens=MAX_NEW_TOKENS,
246
- max_token=MAX_MODEL_TOKENS,
247
- call_agent=False,
248
- conversation=[],
249
- ):
250
- if isinstance(result, str):
251
- final_report_text += result
252
- elif hasattr(result, "content"):
253
- final_report_text += result.content
254
- elif isinstance(result, list):
255
- for r in result:
256
- if hasattr(r, "content"):
257
- final_report_text += r.content
258
- except Exception as e:
259
- messages.append({"role": "assistant", "content": f"❌ Error generating final report: {str(e)}"})
260
- return messages, report_path
261
-
262
- final_report = f"# \U0001f9e0 Final Patient Report\n\n{clean_response(final_report_text)}"
263
- messages[-1]["content"] = f"📊 Final Report:\n\n{clean_response(final_report_text)}"
264
-
265
- # Save the report
266
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
267
- report_path = os.path.join(report_dir, f"report_{timestamp}.md")
268
-
269
- with open(report_path, 'w') as f:
270
- f.write(final_report)
271
-
272
- messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
273
-
274
- except Exception as e:
275
- messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
276
-
277
- return messages, report_path
278
 
279
  def create_ui(agent):
280
  """Create the Gradio UI for the patient history analysis tool."""
281
- with gr.Blocks(title="Patient History Chat", css=".gradio-container {max-width: 900px !important}") as demo:
282
- gr.Markdown("## 🏥 Patient History Analysis Tool")
283
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  with gr.Row():
285
  with gr.Column(scale=3):
286
  chatbot = gr.Chatbot(
@@ -291,7 +190,8 @@ def create_ui(agent):
291
  avatar_images=(
292
  None,
293
  "https://i.imgur.com/6wX7Zb4.png"
294
- )
 
295
  )
296
  with gr.Column(scale=1):
297
  file_upload = gr.File(
@@ -301,7 +201,8 @@ def create_ui(agent):
301
  )
302
  analyze_btn = gr.Button(
303
  "🧠 Analyze Patient History",
304
- variant="primary"
 
305
  )
306
  report_output = gr.File(
307
  label="Download Report",
@@ -309,13 +210,20 @@ def create_ui(agent):
309
  interactive=False
310
  )
311
 
312
- # State to maintain chatbot messages
313
  chatbot_state = gr.State(value=[])
314
 
315
  def update_ui(file, current_state):
316
  messages, report_path = process_final_report(agent, file, current_state)
 
 
 
 
 
 
 
 
317
  report_update = gr.update(visible=report_path is not None, value=report_path)
318
- return messages, report_update, messages
319
 
320
  analyze_btn.click(
321
  fn=update_ui,
@@ -326,6 +234,7 @@ def create_ui(agent):
326
 
327
  return demo
328
 
 
329
  if __name__ == "__main__":
330
  try:
331
  agent = init_agent()
@@ -339,4 +248,4 @@ if __name__ == "__main__":
339
  )
340
  except Exception as e:
341
  print(f"Error: {str(e)}")
342
- sys.exit(1)
 
32
  from txagent.txagent import TxAgent
33
 
34
  # Constants
35
+ MAX_MODEL_TOKENS = 32768
36
+ MAX_CHUNK_TOKENS = 8192
37
+ MAX_NEW_TOKENS = 2048
38
+ PROMPT_OVERHEAD = 500
39
 
40
  def clean_response(text: str) -> str:
41
  try:
 
48
  return text.strip()
49
 
50
  def estimate_tokens(text: str) -> int:
51
+ return len(text) // 3.5 + 1
 
52
 
53
  def extract_text_from_excel(file_path: str) -> str:
 
54
  all_text = []
55
  try:
56
  xls = pd.ExcelFile(file_path)
 
65
  return "\n".join(all_text)
66
 
67
  def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
 
 
 
 
68
  effective_max_tokens = max_tokens - PROMPT_OVERHEAD
69
  if effective_max_tokens <= 0:
70
  raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
 
77
  for line in lines:
78
  line_tokens = estimate_tokens(line)
79
  if current_tokens + line_tokens > effective_max_tokens:
80
+ if current_chunk:
81
  chunks.append("\n".join(current_chunk))
82
  current_chunk = [line]
83
  current_tokens = line_tokens
 
91
  return chunks
92
 
93
  def build_prompt_from_text(chunk: str) -> str:
 
94
  return f"""
95
  ### Unstructured Clinical Records
96
 
 
111
  """
112
 
113
  def init_agent():
 
114
  default_tool_path = os.path.abspath("data/new_tool.json")
115
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
116
 
 
131
  return agent
132
 
133
  def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
134
+ from app_backend import run_analysis_pipeline # Or keep as is if internal
135
+ return run_analysis_pipeline(agent, file, chatbot_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  def create_ui(agent):
138
  """Create the Gradio UI for the patient history analysis tool."""
139
+ with gr.Blocks(
140
+ title="Patient History Chat",
141
+ css="""
142
+ .gradio-container {
143
+ max-width: 900px !important;
144
+ margin: auto;
145
+ font-family: 'Segoe UI', sans-serif;
146
+ background-color: #f8f9fa;
147
+ }
148
+ .gr-button.primary {
149
+ background: linear-gradient(to right, #4b6cb7, #182848);
150
+ color: white;
151
+ border: none;
152
+ border-radius: 8px;
153
+ }
154
+ .gr-button.primary:hover {
155
+ background: linear-gradient(to right, #3552a3, #101a3e);
156
+ }
157
+ .gr-file-upload, .gr-chatbot, .gr-markdown {
158
+ background-color: white;
159
+ border-radius: 10px;
160
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
161
+ padding: 1rem;
162
+ }
163
+ .gr-chatbot {
164
+ border-left: 4px solid #4b6cb7;
165
+ }
166
+ .gr-file-upload input {
167
+ font-size: 0.95rem;
168
+ }
169
+ .chat-message-content p {
170
+ margin: 0.3em 0;
171
+ }
172
+ .chat-message-content ul {
173
+ padding-left: 1.2em;
174
+ margin: 0.4em 0;
175
+ }
176
+ """
177
+ ) as demo:
178
+ gr.Markdown("""
179
+ <h2 style='color:#182848'>🏥 Patient History Analysis Tool</h2>
180
+ <p style='color:#444;'>Upload an Excel file containing clinical data. The assistant will analyze it for patterns, inconsistencies, and recommendations.</p>
181
+ """)
182
+
183
  with gr.Row():
184
  with gr.Column(scale=3):
185
  chatbot = gr.Chatbot(
 
190
  avatar_images=(
191
  None,
192
  "https://i.imgur.com/6wX7Zb4.png"
193
+ ),
194
+ render_markdown=True
195
  )
196
  with gr.Column(scale=1):
197
  file_upload = gr.File(
 
201
  )
202
  analyze_btn = gr.Button(
203
  "🧠 Analyze Patient History",
204
+ variant="primary",
205
+ elem_classes="primary"
206
  )
207
  report_output = gr.File(
208
  label="Download Report",
 
210
  interactive=False
211
  )
212
 
 
213
  chatbot_state = gr.State(value=[])
214
 
215
  def update_ui(file, current_state):
216
  messages, report_path = process_final_report(agent, file, current_state)
217
+ formatted_messages = []
218
+ for msg in messages:
219
+ role = msg.get("role")
220
+ content = msg.get("content", "")
221
+ if role == "assistant":
222
+ content = content.replace("- ", "\n- ") # Ensure bullet formatting
223
+ content = f"<div class='chat-message-content'>{content}</div>"
224
+ formatted_messages.append({"role": role, "content": content})
225
  report_update = gr.update(visible=report_path is not None, value=report_path)
226
+ return formatted_messages, report_update, formatted_messages
227
 
228
  analyze_btn.click(
229
  fn=update_ui,
 
234
 
235
  return demo
236
 
237
+
238
  if __name__ == "__main__":
239
  try:
240
  agent = init_agent()
 
248
  )
249
  except Exception as e:
250
  print(f"Error: {str(e)}")
251
+ sys.exit(1)