Update app.py
Browse files
app.py
CHANGED
@@ -306,7 +306,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
306 |
|
307 |
with gr.Row():
|
308 |
with gr.Column(scale=3):
|
309 |
-
chatbot = gr.Chatbot(label="Analysis Summary", height=600)
|
310 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...")
|
311 |
send_btn = gr.Button("Analyze", variant="primary")
|
312 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
@@ -316,8 +316,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
316 |
download_output = gr.File(label="Download Detailed Report")
|
317 |
progress_bar = gr.Progress()
|
318 |
|
319 |
-
def analyze(message: str, history: List[
|
320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
yield history, None, ""
|
322 |
|
323 |
extracted = []
|
@@ -329,33 +335,44 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
329 |
cache_key = f"{file_hash(f.name)}_{file_type}"
|
330 |
|
331 |
if cache_key in cache:
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
result = process_file_cached(f.name, file_type)
|
337 |
-
if result and not (len(result) == 1 and "error" in result[0]):
|
338 |
-
cache[cache_key] = result
|
339 |
-
extracted.extend(result)
|
340 |
-
history.append({"role": "assistant", "content": f"Processed {os.path.basename(f.name)}"})
|
341 |
yield history, None, ""
|
342 |
else:
|
343 |
-
|
344 |
-
history
|
345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
return
|
347 |
|
348 |
file_hash_value = file_hash(files[0].name) if files else ""
|
349 |
-
|
350 |
if not extracted:
|
351 |
-
history
|
352 |
-
yield history, None, "No valid content extracted"
|
353 |
return
|
354 |
|
355 |
chunks = [item["content"] for item in extracted if "content" in item]
|
356 |
if not chunks:
|
357 |
-
history
|
358 |
-
yield history, None, "No processable content found"
|
359 |
return
|
360 |
|
361 |
combined_response = ""
|
@@ -365,15 +382,6 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
365 |
for batch_idx in range(0, len(chunks), BATCH_SIZE):
|
366 |
batch_chunks = chunks[batch_idx:batch_idx + BATCH_SIZE]
|
367 |
|
368 |
-
batch_prompts = [
|
369 |
-
PROMPT_TEMPLATE.format(
|
370 |
-
batch_idx + i + 1,
|
371 |
-
len(chunks),
|
372 |
-
chunk=chunk[:1800]
|
373 |
-
)
|
374 |
-
for i, chunk in enumerate(batch_chunks)
|
375 |
-
]
|
376 |
-
|
377 |
progress(batch_idx / len(chunks),
|
378 |
desc=f"Processing batch {(batch_idx // BATCH_SIZE) + 1}/{(len(chunks) + BATCH_SIZE - 1) // BATCH_SIZE}")
|
379 |
|
@@ -391,12 +399,12 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
391 |
try:
|
392 |
response = clean_response(future.result())
|
393 |
if response:
|
394 |
-
combined_response += f"--- Analysis for Chunk {batch_idx + chunk_idx + 1} ---\n{response}\n"
|
395 |
-
history[-1] =
|
396 |
yield history, None, ""
|
397 |
except Exception as e:
|
398 |
logger.error(f"Chunk processing error: {e}")
|
399 |
-
history[-1] =
|
400 |
yield history, None, ""
|
401 |
finally:
|
402 |
del future
|
@@ -404,12 +412,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
404 |
gc.collect()
|
405 |
|
406 |
summary = "Analysis complete. " + ("Download full report below." if report_path and os.path.exists(report_path) else "")
|
407 |
-
history.append(
|
|
|
408 |
yield history, report_path, summary
|
409 |
|
410 |
except Exception as e:
|
411 |
logger.error(f"Analysis error: {e}")
|
412 |
-
history.append(
|
|
|
413 |
yield history, None, f"Error occurred: {str(e)}"
|
414 |
finally:
|
415 |
torch.cuda.empty_cache()
|
|
|
306 |
|
307 |
with gr.Row():
|
308 |
with gr.Column(scale=3):
|
309 |
+
chatbot = gr.Chatbot(label="Analysis Summary", height=600, value=[])
|
310 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...")
|
311 |
send_btn = gr.Button("Analyze", variant="primary")
|
312 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
|
|
316 |
download_output = gr.File(label="Download Detailed Report")
|
317 |
progress_bar = gr.Progress()
|
318 |
|
319 |
+
def analyze(message: str, history: List[List[str]], files: List, progress=gr.Progress()):
|
320 |
+
"""Optimized analysis pipeline with correct message formatting"""
|
321 |
+
# Initialize with empty history if None
|
322 |
+
if history is None:
|
323 |
+
history = []
|
324 |
+
|
325 |
+
# Append user message
|
326 |
+
history.append([message, None])
|
327 |
yield history, None, ""
|
328 |
|
329 |
extracted = []
|
|
|
335 |
cache_key = f"{file_hash(f.name)}_{file_type}"
|
336 |
|
337 |
if cache_key in cache:
|
338 |
+
cached_data = cache[cache_key]
|
339 |
+
if isinstance(cached_data, list) and len(cached_data) > 0:
|
340 |
+
extracted.extend(cached_data)
|
341 |
+
history[-1][1] = f"β
Using cached data for {os.path.basename(f.name)}"
|
|
|
|
|
|
|
|
|
|
|
342 |
yield history, None, ""
|
343 |
else:
|
344 |
+
history[-1][1] = f"β Cached data empty for {os.path.basename(f.name)}. Reprocessing..."
|
345 |
+
yield history, None, ""
|
346 |
+
else:
|
347 |
+
try:
|
348 |
+
result = process_file_cached(f.name, file_type)
|
349 |
+
if result and not (len(result) == 1 and "error" in result[0]):
|
350 |
+
cache[cache_key] = result
|
351 |
+
extracted.extend(result)
|
352 |
+
history[-1][1] = f"β
Processed {os.path.basename(f.name)}"
|
353 |
+
yield history, None, ""
|
354 |
+
else:
|
355 |
+
error_msg = result[0]["error"] if result else "Unknown error"
|
356 |
+
history[-1][1] = f"β Failed to process {os.path.basename(f.name)}: {error_msg}"
|
357 |
+
yield history, None, error_msg
|
358 |
+
return
|
359 |
+
except Exception as e:
|
360 |
+
logger.error(f"File processing error: {e}")
|
361 |
+
history[-1][1] = f"β Error processing {os.path.basename(f.name)}: {str(e)}"
|
362 |
+
yield history, None, str(e)
|
363 |
return
|
364 |
|
365 |
file_hash_value = file_hash(files[0].name) if files else ""
|
366 |
+
|
367 |
if not extracted:
|
368 |
+
history[-1][1] = "β No valid content extracted. Please upload a supported file."
|
369 |
+
yield history, None, "No valid content extracted."
|
370 |
return
|
371 |
|
372 |
chunks = [item["content"] for item in extracted if "content" in item]
|
373 |
if not chunks:
|
374 |
+
history[-1][1] = "β No processable content found in the file."
|
375 |
+
yield history, None, "No processable content found."
|
376 |
return
|
377 |
|
378 |
combined_response = ""
|
|
|
382 |
for batch_idx in range(0, len(chunks), BATCH_SIZE):
|
383 |
batch_chunks = chunks[batch_idx:batch_idx + BATCH_SIZE]
|
384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
progress(batch_idx / len(chunks),
|
386 |
desc=f"Processing batch {(batch_idx // BATCH_SIZE) + 1}/{(len(chunks) + BATCH_SIZE - 1) // BATCH_SIZE}")
|
387 |
|
|
|
399 |
try:
|
400 |
response = clean_response(future.result())
|
401 |
if response:
|
402 |
+
combined_response += f"\n--- Analysis for Chunk {batch_idx + chunk_idx + 1} ---\n{response}\n"
|
403 |
+
history[-1][1] = combined_response.strip()
|
404 |
yield history, None, ""
|
405 |
except Exception as e:
|
406 |
logger.error(f"Chunk processing error: {e}")
|
407 |
+
history[-1][1] = f"Error processing chunk: {str(e)}"
|
408 |
yield history, None, ""
|
409 |
finally:
|
410 |
del future
|
|
|
412 |
gc.collect()
|
413 |
|
414 |
summary = "Analysis complete. " + ("Download full report below." if report_path and os.path.exists(report_path) else "")
|
415 |
+
history.append(["Analysis completed", None])
|
416 |
+
history[-1][1] = summary
|
417 |
yield history, report_path, summary
|
418 |
|
419 |
except Exception as e:
|
420 |
logger.error(f"Analysis error: {e}")
|
421 |
+
history.append(["Analysis failed", None])
|
422 |
+
history[-1][1] = f"β Error occurred: {str(e)}"
|
423 |
yield history, None, f"Error occurred: {str(e)}"
|
424 |
finally:
|
425 |
torch.cuda.empty_cache()
|