Ali2206 commited on
Commit
973658c
·
verified ·
1 Parent(s): 9c7c3ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -26
app.py CHANGED
@@ -1,5 +1,19 @@
1
-
2
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  os.makedirs(persistent_dir, exist_ok=True)
4
 
5
  model_cache_dir = os.path.join(persistent_dir, "txagent_models")
@@ -119,21 +133,24 @@ def init_agent():
119
 
120
  def clean_response(response: str) -> str:
121
  """Clean the response by removing tool calls and duplicate content."""
122
- # Remove all tool call blocks
123
- response = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL)
124
 
125
- # Remove duplicate sentences (simple approach)
126
- sentences = [s.strip() for s in response.split('.') if s.strip()]
127
- unique_sentences = []
128
- seen_sentences = set()
129
 
130
- for sentence in sentences:
131
- if sentence not in seen_sentences:
132
- seen_sentences.add(sentence)
133
- unique_sentences.append(sentence)
 
 
 
 
 
134
 
135
  # Reconstruct the response
136
- cleaned = '. '.join(unique_sentences) + '.' if unique_sentences else response
137
 
138
  # Remove any remaining JSON-like artifacts
139
  cleaned = re.sub(r'\{.*?\}', '', cleaned)
@@ -177,7 +194,7 @@ Medical Records:
177
  """
178
 
179
  try:
180
- response = ""
181
  for chunk in agent.run_gradio_chat(
182
  message=prompt,
183
  history=[],
@@ -190,24 +207,29 @@ Medical Records:
190
  if chunk is None:
191
  continue
192
  if isinstance(chunk, str):
193
- response += chunk
194
  elif isinstance(chunk, list):
195
- response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
196
-
197
- # Clean the response before displaying
198
- clean_response_text = clean_response(response)
199
-
200
- if not clean_response_text:
201
- clean_response_text = "⚠️ No clear oversights identified or model output was invalid."
202
-
 
 
 
 
 
203
  # Save the full report
204
  report_path = None
205
  if file_hash_value:
206
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
207
  with open(report_path, "w", encoding="utf-8") as f:
208
- f.write(clean_response_text)
209
-
210
- history[-1] = {"role": "assistant", "content": clean_response_text}
211
  yield history, report_path if report_path and os.path.exists(report_path) else None
212
 
213
  except Exception as e:
 
1
+ import sys
2
+ import os
3
+ import pandas as pd
4
+ import pdfplumber
5
+ import json
6
+ import gradio as gr
7
+ from typing import List
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ import hashlib
10
+ import shutil
11
+ import re
12
+ import psutil
13
+ import subprocess
14
+
15
+ # Persistent directory
16
+ persistent_dir = "/data/hf_cache"
17
  os.makedirs(persistent_dir, exist_ok=True)
18
 
19
  model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 
133
 
134
  def clean_response(response: str) -> str:
135
  """Clean the response by removing tool calls and duplicate content."""
136
+ # First remove all tool call blocks
137
+ response = re.sub(r'\[TOOL_CALLS\].*?(\[TOOL_CALLS\]|$)', '', response, flags=re.DOTALL)
138
 
139
+ # Then remove any remaining standalone tool call markers
140
+ response = response.replace('[TOOL_CALLS]', '')
 
 
141
 
142
+ # Remove duplicate sections (looking for repeated identical paragraphs)
143
+ paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
144
+ unique_paragraphs = []
145
+ seen_paragraphs = set()
146
+
147
+ for para in paragraphs:
148
+ if para not in seen_paragraphs:
149
+ seen_paragraphs.add(para)
150
+ unique_paragraphs.append(para)
151
 
152
  # Reconstruct the response
153
+ cleaned = '\n\n'.join(unique_paragraphs)
154
 
155
  # Remove any remaining JSON-like artifacts
156
  cleaned = re.sub(r'\{.*?\}', '', cleaned)
 
194
  """
195
 
196
  try:
197
+ full_response = ""
198
  for chunk in agent.run_gradio_chat(
199
  message=prompt,
200
  history=[],
 
207
  if chunk is None:
208
  continue
209
  if isinstance(chunk, str):
210
+ full_response += chunk
211
  elif isinstance(chunk, list):
212
+ full_response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
213
+
214
+ # Clean the current response for display
215
+ current_cleaned = clean_response(full_response)
216
+ if current_cleaned:
217
+ history[-1] = {"role": "assistant", "content": current_cleaned}
218
+ yield history, None
219
+
220
+ # Final cleaning and processing
221
+ final_cleaned = clean_response(full_response)
222
+ if not final_cleaned:
223
+ final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
224
+
225
  # Save the full report
226
  report_path = None
227
  if file_hash_value:
228
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
229
  with open(report_path, "w", encoding="utf-8") as f:
230
+ f.write(final_cleaned)
231
+
232
+ history[-1] = {"role": "assistant", "content": final_cleaned}
233
  yield history, report_path if report_path and os.path.exists(report_path) else None
234
 
235
  except Exception as e: