Ali2206 commited on
Commit
eea533f
Β·
verified Β·
1 Parent(s): 6358a36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -122
app.py CHANGED
@@ -14,11 +14,6 @@ import subprocess
14
  import multiprocessing
15
  from functools import partial
16
  import time
17
- import logging
18
-
19
- # Setup logging
20
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", filename="/home/user/clinical_oversight_analyzer.log")
21
- logger = logging.getLogger(__name__)
22
 
23
  # Persistent directory
24
  persistent_dir = "/data/hf_cache"
@@ -34,12 +29,10 @@ for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, v
34
  os.makedirs(directory, exist_ok=True)
35
 
36
  os.environ["HF_HOME"] = model_cache_dir
 
37
  os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
38
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
39
  os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
40
- # Remove TRANSFORMERS_CACHE to suppress warning
41
- if "TRANSFORMERS_CACHE" in os.environ:
42
- del os.environ["TRANSFORMERS_CACHE"]
43
 
44
  current_dir = os.path.dirname(os.path.abspath(__file__))
45
  src_path = os.path.abspath(os.path.join(current_dir, "src"))
@@ -54,9 +47,6 @@ def file_hash(path: str) -> str:
54
  with open(path, "rb") as f:
55
  return hashlib.md5(f.read()).hexdigest()
56
 
57
- def batch_hash(chunks: List[str], prompt: str) -> str:
58
- return hashlib.md5(("".join(chunks) + prompt).encode("utf-8")).hexdigest()
59
-
60
  def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
61
  """Extract text from a range of PDF pages."""
62
  try:
@@ -66,8 +56,7 @@ def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
66
  page_text = page.extract_text() or ""
67
  text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
68
  return "\n\n".join(text_chunks)
69
- except Exception as e:
70
- logger.error(f"Error extracting pages {start_page}-{end_page}: {e}")
71
  return ""
72
 
73
  def extract_all_pages(file_path: str, progress_callback=None) -> str:
@@ -79,14 +68,17 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
79
  if total_pages == 0:
80
  return ""
81
 
 
82
  num_processes = min(6, multiprocessing.cpu_count())
83
  pages_per_process = max(1, total_pages // num_processes)
84
 
 
85
  ranges = [(i * pages_per_process, min((i + 1) * pages_per_process, total_pages))
86
  for i in range(num_processes)]
87
  if ranges[-1][1] != total_pages:
88
  ranges[-1] = (ranges[-1][0], total_pages)
89
 
 
90
  with multiprocessing.Pool(processes=num_processes) as pool:
91
  extract_func = partial(extract_page_range, file_path)
92
  results = []
@@ -98,7 +90,6 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
98
 
99
  return "\n\n".join(filter(None, results))
100
  except Exception as e:
101
- logger.error(f"PDF processing error: {e}")
102
  return f"PDF processing error: {str(e)}"
103
 
104
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
@@ -130,61 +121,87 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
130
  f.write(result)
131
  return result
132
  except Exception as e:
133
- logger.error(f"Error processing {file_path}: {e}")
134
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
135
 
136
  def log_system_usage(tag=""):
137
  try:
138
  cpu = psutil.cpu_percent(interval=1)
139
  mem = psutil.virtual_memory()
140
- logger.info(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
141
  result = subprocess.run(
142
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
143
  capture_output=True, text=True
144
  )
145
  if result.returncode == 0:
146
  used, total, util = result.stdout.strip().split(", ")
147
- logger.info(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
148
  except Exception as e:
149
- logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
150
 
151
  def clean_response(text: str) -> str:
152
- """Clean TxAgent response to group findings by section without tool names."""
153
  text = sanitize_utf8(text)
154
- # Remove tool tags, None, and reasoning
155
- text = re.sub(r"\[TOOL:[^\]]+\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
 
156
  text = re.sub(r"\n{3,}", "\n\n", text)
157
- text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
158
-
 
 
 
 
 
 
 
 
 
 
159
  sections = {}
160
  current_section = None
 
161
  lines = text.splitlines()
162
  for line in lines:
163
  line = line.strip()
164
  if not line:
165
  continue
166
- section_match = re.match(r"###\s*(Drugs|Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
 
 
 
 
 
 
167
  if section_match:
168
  current_section = section_match.group(1)
169
  if current_section not in sections:
170
  sections[current_section] = []
171
  continue
 
172
  finding_match = re.match(r"-\s*.+", line)
173
  if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
174
- sections[current_section].append(line)
 
 
 
 
 
 
 
175
 
 
176
  cleaned = []
177
  for heading, findings in sections.items():
178
- if findings:
179
  cleaned.append(f"### {heading}\n" + "\n".join(findings))
180
 
181
  text = "\n\n".join(cleaned).strip()
182
  if not text:
183
- text = ""
184
  return text
185
 
186
  def init_agent():
187
- logger.info("Initializing model...")
188
  log_system_usage("Before Load")
189
  default_tool_path = os.path.abspath("data/new_tool.json")
190
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -197,67 +214,15 @@ def init_agent():
197
  tool_files_dict={"new_tool": target_tool_path},
198
  force_finish=True,
199
  enable_checker=True,
200
- step_rag_num=1,
201
  seed=100,
202
  additional_default_tools=[],
203
  )
204
  agent.init_model()
205
  log_system_usage("After Load")
206
- logger.info("Agent Ready")
207
  return agent
208
 
209
- def process_batch(agent, chunks: List[str], cache_path: str, prompt_template: str) -> str:
210
- """Process a batch of chunks in a single prompt."""
211
- if not any(chunk.strip() for chunk in chunks):
212
- logger.warning("All chunks are empty, skipping analysis...")
213
- return "No oversights identified in the provided records."
214
-
215
- batch_id = batch_hash(chunks, prompt_template)
216
- batch_cache_path = os.path.join(cache_path, f"batch_{batch_id}.txt")
217
-
218
- if os.path.exists(batch_cache_path):
219
- with open(batch_cache_path, "r", encoding="utf-8") as f:
220
- logger.info("Cache hit for batch")
221
- return f.read()
222
-
223
- # Combine chunks into one prompt
224
- chunk_texts = [f"Chunk {i+1}:\n{chunk[:500]}" for i, chunk in enumerate(chunks) if chunk.strip()]
225
- combined_text = "\n\n".join(chunk_texts)
226
- prompt = prompt_template.format(chunks=combined_text)
227
- response = ""
228
-
229
- try:
230
- for output in agent.run_gradio_chat(
231
- message=prompt,
232
- history=[],
233
- temperature=0.2,
234
- max_new_tokens=256,
235
- max_token=1024,
236
- call_agent=False,
237
- conversation=[],
238
- ):
239
- if output is None:
240
- continue
241
- if isinstance(output, list):
242
- for m in output:
243
- if hasattr(m, 'content') and m.content:
244
- cleaned = clean_response(m.content)
245
- if cleaned and re.search(r"###\s*\w+", cleaned):
246
- response += cleaned + "\n\n"
247
- elif isinstance(output, str) and output.strip():
248
- cleaned = clean_response(output)
249
- if cleaned and re.search(r"###\s*\w+", cleaned):
250
- response += cleaned + "\n\n"
251
- except Exception as e:
252
- logger.error(f"Error processing batch: {e}")
253
- return f"Error occurred: {str(e)}"
254
-
255
- if response:
256
- with open(batch_cache_path, "w", encoding="utf-8") as f:
257
- f.write(response)
258
- return response
259
- return "No oversights identified in the provided records."
260
-
261
  def create_ui(agent):
262
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
263
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
@@ -267,27 +232,6 @@ def create_ui(agent):
267
  send_btn = gr.Button("Analyze", variant="primary")
268
  download_output = gr.File(label="Download Full Report")
269
 
270
- prompt_template = """
271
- You are a medical analysis assistant. Analyze the following patient record excerpts for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under the following headings: 'Drugs', 'Missed Diagnoses', 'Medication Conflicts', 'Incomplete Assessments', 'Urgent Follow-up'. For each finding, include:
272
- - Clinical context (why the issue was missed or relevant details from the record).
273
- - Potential risks if unaddressed (e.g., disease progression, adverse events).
274
- - Actionable recommendations (e.g., tests, referrals, medication adjustments).
275
- Output ONLY the markdown-formatted findings, with bullet points under each heading. Do NOT include tool references, reasoning, or intermediate steps. If no issues are found for a section, omit that section. Ensure the output is specific to the provided text and avoids generic responses.
276
-
277
- Example Output:
278
- ### Drugs
279
- - Opioid use disorder not addressed. Missed due to lack of screening. Risks: overdose. Recommend: addiction specialist referral.
280
- ### Missed Diagnoses
281
- - Elevated BP noted without diagnosis. Missed due to inconsistent visits. Risks: stroke. Recommend: BP monitoring, antihypertensives.
282
- ### Incomplete Assessments
283
- - Chest pain not evaluated. Time constraints likely cause. Risks: cardiac issues. Recommend: ECG, stress test.
284
- ### Urgent Follow-up
285
- - Abnormal creatinine not addressed. Delayed lab review. Risks: renal failure. Recommend: nephrology referral.
286
-
287
- Patient Record Excerpts:
288
- {chunks}
289
- """
290
-
291
  def analyze(message: str, history: List[dict], files: List):
292
  history.append({"role": "user", "content": message})
293
  history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
@@ -296,6 +240,7 @@ Patient Record Excerpts:
296
  extracted = ""
297
  file_hash_value = ""
298
  if files:
 
299
  total_pages = 0
300
  processed_pages = 0
301
  def update_extraction_progress(current, total):
@@ -312,36 +257,102 @@ Patient Record Excerpts:
312
  extracted = "\n".join(results)
313
  file_hash_value = file_hash(files[0].name) if files else ""
314
 
315
- history.pop()
316
  history.append({"role": "assistant", "content": "βœ… Text extraction complete."})
317
  yield history, None
318
 
319
- chunk_size = 500 # Fixed for speed
320
- max_chunks = 5 # Fixed for speed
321
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
322
- chunks = chunks[:max_chunks] # Limit to 5 chunks
323
- if not chunks:
324
- history.append({"role": "assistant", "content": "No content to analyze."})
325
- yield history, None
326
- return
327
 
328
- try:
329
- animation = ["πŸ”", "πŸ“Š", "🧠", "πŸ”Ž"][(int(time.time() * 2) % 4)]
330
- history.append({"role": "assistant", "content": f"Analyzing chunks 1-5... {animation}"})
331
- yield history, None
 
 
332
 
333
- response = process_batch(agent, chunks, file_cache_dir, prompt_template)
334
- history[-1] = {"role": "assistant", "content": response.strip()}
335
- yield history, None
 
 
 
 
 
 
 
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
338
- if report_path and response.strip() and "No oversights identified" not in response and "Error occurred" not in response:
339
  with open(report_path, "w", encoding="utf-8") as f:
340
- f.write(response)
341
  yield history, report_path if report_path and os.path.exists(report_path) else None
342
 
343
  except Exception as e:
344
- logger.error(f"Analysis error: {e}")
345
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
346
  yield history, None
347
 
@@ -350,7 +361,7 @@ Patient Record Excerpts:
350
  return demo
351
 
352
  if __name__ == "__main__":
353
- logger.info("Launching app...")
354
  agent = init_agent()
355
  demo = create_ui(agent)
356
  demo.queue(api_open=False).launch(
 
14
  import multiprocessing
15
  from functools import partial
16
  import time
 
 
 
 
 
17
 
18
  # Persistent directory
19
  persistent_dir = "/data/hf_cache"
 
29
  os.makedirs(directory, exist_ok=True)
30
 
31
  os.environ["HF_HOME"] = model_cache_dir
32
+ os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
33
  os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
34
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
35
  os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 
 
 
36
 
37
  current_dir = os.path.dirname(os.path.abspath(__file__))
38
  src_path = os.path.abspath(os.path.join(current_dir, "src"))
 
47
  with open(path, "rb") as f:
48
  return hashlib.md5(f.read()).hexdigest()
49
 
 
 
 
50
  def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
51
  """Extract text from a range of PDF pages."""
52
  try:
 
56
  page_text = page.extract_text() or ""
57
  text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
58
  return "\n\n".join(text_chunks)
59
+ except Exception:
 
60
  return ""
61
 
62
  def extract_all_pages(file_path: str, progress_callback=None) -> str:
 
68
  if total_pages == 0:
69
  return ""
70
 
71
+ # Use 6 processes (adjust based on CPU cores)
72
  num_processes = min(6, multiprocessing.cpu_count())
73
  pages_per_process = max(1, total_pages // num_processes)
74
 
75
+ # Create page ranges for parallel processing
76
  ranges = [(i * pages_per_process, min((i + 1) * pages_per_process, total_pages))
77
  for i in range(num_processes)]
78
  if ranges[-1][1] != total_pages:
79
  ranges[-1] = (ranges[-1][0], total_pages)
80
 
81
+ # Process page ranges in parallel
82
  with multiprocessing.Pool(processes=num_processes) as pool:
83
  extract_func = partial(extract_page_range, file_path)
84
  results = []
 
90
 
91
  return "\n\n".join(filter(None, results))
92
  except Exception as e:
 
93
  return f"PDF processing error: {str(e)}"
94
 
95
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
 
121
  f.write(result)
122
  return result
123
  except Exception as e:
 
124
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
125
 
126
  def log_system_usage(tag=""):
127
  try:
128
  cpu = psutil.cpu_percent(interval=1)
129
  mem = psutil.virtual_memory()
130
+ print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
131
  result = subprocess.run(
132
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
133
  capture_output=True, text=True
134
  )
135
  if result.returncode == 0:
136
  used, total, util = result.stdout.strip().split(", ")
137
+ print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
138
  except Exception as e:
139
+ print(f"[{tag}] GPU/CPU monitor failed: {e}")
140
 
141
  def clean_response(text: str) -> str:
142
+ """Clean TxAgent response to group findings under tool-derived headings."""
143
  text = sanitize_utf8(text)
144
+ # Remove tool call artifacts, None, and reasoning
145
+ text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
146
+ # Remove extra whitespace and non-markdown content
147
  text = re.sub(r"\n{3,}", "\n\n", text)
148
+ text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text) # Keep markdown-relevant characters
149
+
150
+ # Define tool-to-heading mapping
151
+ tool_to_heading = {
152
+ "get_abuse_info_by_drug_name": "Drugs",
153
+ "get_dependence_info_by_drug_name": "Drugs",
154
+ "get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
155
+ "get_info_for_patients_by_drug_name": "Drugs",
156
+ # Add other tools from new_tool.json if applicable
157
+ }
158
+
159
+ # Parse sections and findings
160
  sections = {}
161
  current_section = None
162
+ current_tool = None
163
  lines = text.splitlines()
164
  for line in lines:
165
  line = line.strip()
166
  if not line:
167
  continue
168
+ # Detect tool tag
169
+ tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
170
+ if tool_match:
171
+ current_tool = tool_match.group(1)
172
+ continue
173
+ # Detect section heading
174
+ section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
175
  if section_match:
176
  current_section = section_match.group(1)
177
  if current_section not in sections:
178
  sections[current_section] = []
179
  continue
180
+ # Detect finding
181
  finding_match = re.match(r"-\s*.+", line)
182
  if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
183
+ # Assign to tool-derived heading if tool is specified
184
+ if current_tool and current_tool in tool_to_heading:
185
+ heading = tool_to_heading[current_tool]
186
+ if heading not in sections:
187
+ sections[heading] = []
188
+ sections[heading].append(line)
189
+ else:
190
+ sections[current_section].append(line)
191
 
192
+ # Combine non-empty sections
193
  cleaned = []
194
  for heading, findings in sections.items():
195
+ if findings: # Only include sections with findings
196
  cleaned.append(f"### {heading}\n" + "\n".join(findings))
197
 
198
  text = "\n\n".join(cleaned).strip()
199
  if not text:
200
+ text = "" # Return empty string if no valid findings
201
  return text
202
 
203
  def init_agent():
204
+ print("πŸ” Initializing model...")
205
  log_system_usage("Before Load")
206
  default_tool_path = os.path.abspath("data/new_tool.json")
207
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
 
214
  tool_files_dict={"new_tool": target_tool_path},
215
  force_finish=True,
216
  enable_checker=True,
217
+ step_rag_num=4,
218
  seed=100,
219
  additional_default_tools=[],
220
  )
221
  agent.init_model()
222
  log_system_usage("After Load")
223
+ print("βœ… Agent Ready")
224
  return agent
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  def create_ui(agent):
227
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
228
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
 
232
  send_btn = gr.Button("Analyze", variant="primary")
233
  download_output = gr.File(label="Download Full Report")
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  def analyze(message: str, history: List[dict], files: List):
236
  history.append({"role": "user", "content": message})
237
  history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
 
240
  extracted = ""
241
  file_hash_value = ""
242
  if files:
243
+ # Progress callback for extraction
244
  total_pages = 0
245
  processed_pages = 0
246
  def update_extraction_progress(current, total):
 
257
  extracted = "\n".join(results)
258
  file_hash_value = file_hash(files[0].name) if files else ""
259
 
260
+ history.pop() # Remove extraction message
261
  history.append({"role": "assistant", "content": "βœ… Text extraction complete."})
262
  yield history, None
263
 
264
+ # Split extracted text into chunks of ~6,000 characters
265
+ chunk_size = 6000
266
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
267
+ combined_response = ""
 
 
 
 
268
 
269
+ prompt_template = """
270
+ You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
271
+ - Clinical context (why the issue was missed or relevant details from the record).
272
+ - Potential risks if unaddressed (e.g., disease progression, adverse events).
273
+ - Actionable recommendations (e.g., tests, referrals, medication adjustments).
274
+ Output ONLY the markdown-formatted findings, with bullet points under each heading. Precede each finding with a tool tag (e.g., [TOOL: get_abuse_info_by_drug_name]) to indicate the tool used. Do NOT include reasoning, tool calls, or intermediate steps. If no issues are found for a tool or category, state "No issues identified" for that section. Ensure the output is specific to the provided text and avoids generic responses.
275
 
276
+ Example Output:
277
+ ### Drugs
278
+ [TOOL: get_abuse_info_by_drug_name]
279
+ - Opioid use disorder not addressed. Missed due to lack of screening. Risks: overdose. Recommend: addiction specialist referral.
280
+ ### Missed Diagnoses
281
+ - Elevated BP noted without diagnosis. Missed due to inconsistent visits. Risks: stroke. Recommend: BP monitoring, antihypertensives.
282
+ ### Incomplete Assessments
283
+ - Chest pain not evaluated. Time constraints likely cause. Risks: cardiac issues. Recommend: ECG, stress test.
284
+ ### Urgent Follow-up
285
+ - Abnormal creatinine not addressed. Delayed lab review. Risks: renal failure. Recommend: nephrology referral.
286
 
287
+ Patient Record Excerpt (Chunk {0} of {1}):
288
+ {chunk}
289
+ """
290
+
291
+ try:
292
+ # Process each chunk and stream results in real-time
293
+ for chunk_idx, chunk in enumerate(chunks, 1):
294
+ # Update UI with chunk progress
295
+ animation = ["πŸ”", "πŸ“Š", "🧠", "πŸ”Ž"][(int(time.time() * 2) % 4)]
296
+ history.append({"role": "assistant", "content": f"Analyzing records... {animation} Chunk {chunk_idx}/{len(chunks)}"})
297
+ yield history, None
298
+
299
+ prompt = prompt_template.format(chunk_idx, len(chunks), chunk=chunk[:4000]) # Truncate to avoid token limits
300
+ chunk_response = ""
301
+ for chunk_output in agent.run_gradio_chat(
302
+ message=prompt,
303
+ history=[],
304
+ temperature=0.2,
305
+ max_new_tokens=1024,
306
+ max_token=4096,
307
+ call_agent=False,
308
+ conversation=[],
309
+ ):
310
+ if chunk_output is None:
311
+ continue
312
+ if isinstance(chunk_output, list):
313
+ for m in chunk_output:
314
+ if hasattr(m, 'content') and m.content:
315
+ cleaned = clean_response(m.content)
316
+ if cleaned and re.search(r"###\s*\w+", cleaned):
317
+ chunk_response += cleaned + "\n\n"
318
+ # Update UI with partial response
319
+ if history[-1]["content"].startswith("Analyzing"):
320
+ history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
321
+ else:
322
+ history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
323
+ yield history, None
324
+ elif isinstance(chunk_output, str) and chunk_output.strip():
325
+ cleaned = clean_response(chunk_output)
326
+ if cleaned and re.search(r"###\s*\w+", cleaned):
327
+ chunk_response += cleaned + "\n\n"
328
+ # Update UI with partial response
329
+ if history[-1]["content"].startswith("Analyzing"):
330
+ history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
331
+ else:
332
+ history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
333
+ yield history, None
334
+
335
+ # Append completed chunk response to combined response
336
+ if chunk_response:
337
+ combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
338
+ else:
339
+ combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
340
+
341
+ # Finalize UI with complete response
342
+ if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
343
+ history[-1]["content"] = combined_response.strip()
344
+ else:
345
+ history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
346
+
347
+ # Generate report file
348
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
349
+ if report_path:
350
  with open(report_path, "w", encoding="utf-8") as f:
351
+ f.write(combined_response)
352
  yield history, report_path if report_path and os.path.exists(report_path) else None
353
 
354
  except Exception as e:
355
+ print("🚨 ERROR:", e)
356
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
357
  yield history, None
358
 
 
361
  return demo
362
 
363
  if __name__ == "__main__":
364
+ print("πŸš€ Launching app...")
365
  agent = init_agent()
366
  demo = create_ui(agent)
367
  demo.queue(api_open=False).launch(