Ali2206 commited on
Commit
63d0c23
·
verified ·
1 Parent(s): 87babf2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -4
app.py CHANGED
@@ -55,6 +55,7 @@ TARGET_CHUNK_TOKENS = 1200
55
  PROMPT_RESERVE = 100
56
  MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
57
 
 
58
  def log_system_usage(tag=""):
59
  try:
60
  cpu = psutil.cpu_percent(interval=1)
@@ -70,17 +71,21 @@ def log_system_usage(tag=""):
70
  except Exception as e:
71
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
72
 
 
73
  def sanitize_utf8(text: str) -> str:
74
  return text.encode("utf-8", "ignore").decode("utf-8")
75
 
 
76
  def file_hash(path: str) -> str:
77
  with open(path, "rb") as f:
78
  return hashlib.md5(f.read()).hexdigest()
79
 
 
80
  def count_tokens(text: str) -> int:
81
  encoding = tiktoken.get_encoding(TOKENIZER)
82
  return len(encoding.encode(text))
83
 
 
84
  def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
85
  try:
86
  text_chunks = []
@@ -100,6 +105,7 @@ def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
100
  except Exception as e:
101
  return f"PDF processing error: {str(e)}", 0, 0
102
 
 
103
  def convert_file_to_json(file_path: str, file_type: str) -> str:
104
  try:
105
  h = file_hash(file_path)
@@ -133,7 +139,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
133
  df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
134
  except:
135
  df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
136
- content = df.fillna("").astype(str).values.tolist()
137
  result = json.dumps({
138
  "filename": os.path.basename(file_path),
139
  "rows": content,
@@ -147,6 +153,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
147
  except Exception as e:
148
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
149
 
 
150
  def clean_response(text: str) -> str:
151
  text = sanitize_utf8(text)
152
  patterns = [
@@ -159,6 +166,7 @@ def clean_response(text: str) -> str:
159
  text = re.sub(pat, "", text, flags=re.DOTALL)
160
  return re.sub(r"\n{3,}", "\n\n", text).strip()
161
 
 
162
  def format_final_report(analysis_results: List[str], filename: str) -> str:
163
  report = [
164
  "COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS",
@@ -173,7 +181,11 @@ def format_final_report(analysis_results: List[str], filename: str) -> str:
173
  for res in analysis_results:
174
  for sec in sections:
175
  m = re.search(
176
- rf"{re.escape(sec)}:?\s*\n(.+?)(?=\n\*|\n\n|$)",
 
 
 
 
177
  res, re.IGNORECASE | re.DOTALL
178
  )
179
  if m:
@@ -193,6 +205,7 @@ def format_final_report(analysis_results: List[str], filename: str) -> str:
193
  report.append("END OF REPORT")
194
  return "\n".join(report)
195
 
 
196
  def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
197
  paragraphs = re.split(r"\n\s*\n", content)
198
  chunks, current, curr_toks = [], [], 0
@@ -217,6 +230,7 @@ def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
217
  chunks.append("\n\n".join(current))
218
  return chunks
219
 
 
220
  def init_agent():
221
  print("🔁 Initializing model...")
222
  log_system_usage("Before Load")
@@ -239,6 +253,7 @@ def init_agent():
239
  print("✅ Agent Ready")
240
  return agent
241
 
 
242
  def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
243
  base_prompt = (
244
  "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
@@ -272,6 +287,7 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent, tempe
272
  print(f"Error processing chunk {i}: {e}")
273
  return format_final_report(results, filename)
274
 
 
275
  def create_ui(agent):
276
  with gr.Blocks(title="Clinical Oversight Assistant") as demo:
277
  gr.Markdown("""
@@ -307,7 +323,7 @@ def create_ui(agent):
307
  report = analyze_complete_document(combined, "+".join([os.path.basename(f.name) for f in files]), agent, temp)
308
  file_hash_val = hashlib.md5(combined.encode()).hexdigest()
309
  path = os.path.join(report_dir, f"{file_hash_val}_report.txt")
310
- with open(path, "w") as rd:
311
  rd.write(report)
312
  yield report, path, "✅ Analysis complete!", previews
313
  send_btn.click(analyze, [file_upload, msg_input, temperature], [report_output, download_output, status, data_preview])
@@ -326,5 +342,6 @@ if __name__ == "__main__":
326
  server_name="0.0.0.0",
327
  server_port=7860,
328
  show_error=True,
329
- share=False
 
330
  )
 
55
  PROMPT_RESERVE = 100
56
  MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
57
 
58
+
59
  def log_system_usage(tag=""):
60
  try:
61
  cpu = psutil.cpu_percent(interval=1)
 
71
  except Exception as e:
72
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
73
 
74
+
75
  def sanitize_utf8(text: str) -> str:
76
  return text.encode("utf-8", "ignore").decode("utf-8")
77
 
78
+
79
  def file_hash(path: str) -> str:
80
  with open(path, "rb") as f:
81
  return hashlib.md5(f.read()).hexdigest()
82
 
83
+
84
  def count_tokens(text: str) -> int:
85
  encoding = tiktoken.get_encoding(TOKENIZER)
86
  return len(encoding.encode(text))
87
 
88
+
89
  def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
90
  try:
91
  text_chunks = []
 
105
  except Exception as e:
106
  return f"PDF processing error: {str(e)}", 0, 0
107
 
108
+
109
  def convert_file_to_json(file_path: str, file_type: str) -> str:
110
  try:
111
  h = file_hash(file_path)
 
139
  df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
140
  except:
141
  df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
142
+ content = df.fillna("" ).astype(str).values.tolist()
143
  result = json.dumps({
144
  "filename": os.path.basename(file_path),
145
  "rows": content,
 
153
  except Exception as e:
154
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
155
 
156
+
157
  def clean_response(text: str) -> str:
158
  text = sanitize_utf8(text)
159
  patterns = [
 
166
  text = re.sub(pat, "", text, flags=re.DOTALL)
167
  return re.sub(r"\n{3,}", "\n\n", text).strip()
168
 
169
+
170
  def format_final_report(analysis_results: List[str], filename: str) -> str:
171
  report = [
172
  "COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS",
 
181
  for res in analysis_results:
182
  for sec in sections:
183
  m = re.search(
184
+ rf"{re.escape(sec)}:?\s*
185
+ (.+?)(?=
186
+ \*|
187
+
188
+ |$)",
189
  res, re.IGNORECASE | re.DOTALL
190
  )
191
  if m:
 
205
  report.append("END OF REPORT")
206
  return "\n".join(report)
207
 
208
+
209
  def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
210
  paragraphs = re.split(r"\n\s*\n", content)
211
  chunks, current, curr_toks = [], [], 0
 
230
  chunks.append("\n\n".join(current))
231
  return chunks
232
 
233
+
234
  def init_agent():
235
  print("🔁 Initializing model...")
236
  log_system_usage("Before Load")
 
253
  print("✅ Agent Ready")
254
  return agent
255
 
256
+
257
  def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
258
  base_prompt = (
259
  "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
 
287
  print(f"Error processing chunk {i}: {e}")
288
  return format_final_report(results, filename)
289
 
290
+
291
  def create_ui(agent):
292
  with gr.Blocks(title="Clinical Oversight Assistant") as demo:
293
  gr.Markdown("""
 
323
  report = analyze_complete_document(combined, "+".join([os.path.basename(f.name) for f in files]), agent, temp)
324
  file_hash_val = hashlib.md5(combined.encode()).hexdigest()
325
  path = os.path.join(report_dir, f"{file_hash_val}_report.txt")
326
+ with open(path, "w", encoding="utf-8") as rd:
327
  rd.write(report)
328
  yield report, path, "✅ Analysis complete!", previews
329
  send_btn.click(analyze, [file_upload, msg_input, temperature], [report_output, download_output, status, data_preview])
 
342
  server_name="0.0.0.0",
343
  server_port=7860,
344
  show_error=True,
345
+ share=False,
346
+ allowed_paths=[report_dir]
347
  )