Ali2206 commited on
Commit
20d61bd
·
verified ·
1 Parent(s): b92dc05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -17
app.py CHANGED
@@ -1,4 +1,9 @@
1
-
 
 
 
 
 
2
  from typing import List
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
  import hashlib
@@ -51,7 +56,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
51
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
52
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
53
  page_text = page.extract_text() or ""
54
- if any(re.search(rf'\\b{kw}\\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
55
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
56
  return "\n\n".join(text_chunks)
57
  except Exception as e:
@@ -126,6 +131,30 @@ def init_agent():
126
  print("✅ Agent Ready")
127
  return agent
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def create_ui(agent):
130
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
131
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
@@ -163,8 +192,6 @@ Medical Records:
163
 
164
  try:
165
  response = ""
166
- finish_detected = False
167
-
168
  for chunk in agent.run_gradio_chat(
169
  message=prompt,
170
  history=[],
@@ -179,18 +206,22 @@ Medical Records:
179
  if isinstance(chunk, str):
180
  response += chunk
181
  elif isinstance(chunk, list):
182
- chunk_str = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
183
- response += chunk_str
184
- if '"name": "Finish"' in chunk_str:
185
- finish_detected = True
186
-
187
- clean_response = response.rsplit("[TOOL_CALLS]", 1)[0].strip()
188
- print("🔎 Final Cleaned Response:", repr(clean_response))
189
- if not clean_response:
190
- clean_response = "⚠️ No clear oversights identified or model output was invalid."
191
-
192
- history[-1] = {"role": "assistant", "content": clean_response}
193
- report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
 
 
 
 
194
  yield history, report_path if report_path and os.path.exists(report_path) else None
195
 
196
  except Exception as e:
@@ -212,4 +243,4 @@ if __name__ == "__main__":
212
  show_error=True,
213
  allowed_paths=[report_dir],
214
  share=False
215
- )
 
1
+ import sys
2
+ import os
3
+ import pandas as pd
4
+ import pdfplumber
5
+ import json
6
+ import gradio as gr
7
  from typing import List
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
  import hashlib
 
56
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
57
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
58
  page_text = page.extract_text() or ""
59
+ if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
60
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
61
  return "\n\n".join(text_chunks)
62
  except Exception as e:
 
131
  print("✅ Agent Ready")
132
  return agent
133
 
134
+ def clean_response(response: str) -> str:
135
+ """Clean the response by removing tool calls and duplicate content."""
136
+ # Remove all tool call blocks
137
+ response = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL)
138
+
139
+ # Remove duplicate sentences (simple approach)
140
+ sentences = [s.strip() for s in response.split('.') if s.strip()]
141
+ unique_sentences = []
142
+ seen_sentences = set()
143
+
144
+ for sentence in sentences:
145
+ if sentence not in seen_sentences:
146
+ seen_sentences.add(sentence)
147
+ unique_sentences.append(sentence)
148
+
149
+ # Reconstruct the response
150
+ cleaned = '. '.join(unique_sentences) + '.' if unique_sentences else response
151
+
152
+ # Remove any remaining JSON-like artifacts
153
+ cleaned = re.sub(r'\{.*?\}', '', cleaned)
154
+ cleaned = re.sub(r'\[.*?\]', '', cleaned)
155
+
156
+ return cleaned.strip()
157
+
158
  def create_ui(agent):
159
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
160
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
 
192
 
193
  try:
194
  response = ""
 
 
195
  for chunk in agent.run_gradio_chat(
196
  message=prompt,
197
  history=[],
 
206
  if isinstance(chunk, str):
207
  response += chunk
208
  elif isinstance(chunk, list):
209
+ response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
210
+
211
+ # Clean the response before displaying
212
+ clean_response_text = clean_response(response)
213
+
214
+ if not clean_response_text:
215
+ clean_response_text = "⚠️ No clear oversights identified or model output was invalid."
216
+
217
+ # Save the full report
218
+ report_path = None
219
+ if file_hash_value:
220
+ report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
221
+ with open(report_path, "w", encoding="utf-8") as f:
222
+ f.write(clean_response_text)
223
+
224
+ history[-1] = {"role": "assistant", "content": clean_response_text}
225
  yield history, report_path if report_path and os.path.exists(report_path) else None
226
 
227
  except Exception as e:
 
243
  show_error=True,
244
  allowed_paths=[report_dir],
245
  share=False
246
+ )