CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

7a596d9

verified ·

1 Parent(s): 2e43581

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -31

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import hashlib
 import re
 import psutil
 import subprocess
 # Persistent directory
 persistent_dir = "/data/hf_cache"
@@ -45,7 +46,7 @@ def extract_all_pages(file_path: str) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
-            for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
                 text_chunks.append(page_text.strip())
         return "\n".join(text_chunks)
@@ -88,15 +89,49 @@ def log_system_usage(tag=""):
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
-    text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
-    text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
     text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
-    text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)|none).*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
-    if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
         return ""
     return text
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
@@ -124,6 +159,7 @@ def create_ui(agent):
         def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
             yield history, None
             extracted = ""
@@ -135,28 +171,26 @@ def create_ui(agent):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
-            # Split into small chunks of 2,000 characters
-            chunk_size = 2000
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
-            combined_response = ""
-            prompt_template = f"""
-List doctor oversights in the medical records under these headings with brief details:
-**Missed Diagnoses**: Unaddressed conditions or inconsistencies.
-**Medication Conflicts**: Risky prescriptions.
-**Incomplete Assessments**: Missing evaluations.
-**Urgent Follow-up**: Issues needing attention.
 Records:
-{{chunk}}
 """
             try:
-                history.append({"role": "assistant", "content": "🔄 Analyzing..."})
-                yield history, None
-                for chunk_idx, chunk in enumerate(chunks, 1):
                     prompt = prompt_template.format(chunk=chunk)
                     chunk_response = ""
                     for output in agent.run_gradio_chat(
@@ -176,27 +210,23 @@ Records:
                                     cleaned = clean_response(m.content)
                                     if cleaned:
                                         chunk_response += cleaned + "\n"
-                                        history[-1]["content"] = combined_response + chunk_response.strip()
-                                        yield history, None
                         elif isinstance(output, str) and output.strip():
                             cleaned = clean_response(output)
                             if cleaned:
                                 chunk_response += cleaned + "\n"
-                                history[-1]["content"] = combined_response + chunk_response.strip()
-                                yield history, None
                     if chunk_response:
-                        combined_response += chunk_response
-                if not combined_response:
-                    history[-1]["content"] = "No oversights identified."
-                else:
-                    history[-1]["content"] = combined_response.strip()
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
-                if report_path and combined_response:
                     with open(report_path, "w", encoding="utf-8") as f:
-                        f.write(combined_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:

 import re
 import psutil
 import subprocess
+from collections import defaultdict
 # Persistent directory
 persistent_dir = "/data/hf_cache"
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            for page in pdf.pages:
                 page_text = page.extract_text() or ""
                 text_chunks.append(page_text.strip())
         return "\n".join(text_chunks)
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
+    # Remove all tool-related and reasoning text
+    text = re.sub(r"\[TOOL_CALLS\].*|(?:get_|tool\s|retrieve\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
     text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
+    text = re.sub(r"(?i)(to address|analyze the|will (start|look|use|focus)|since the|no (drug|clinical|information)|none|previous|attempt|involve|check for|explore|manually).*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    # Only keep text under specific headings
+    if not re.search(r"^(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.MULTILINE | re.IGNORECASE):
         return ""
     return text
+def consolidate_findings(responses: List[str]) -> str:
+    # Aggregate findings under each heading, removing duplicates
+    findings = defaultdict(set)
+    headings = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
+    for response in responses:
+        if not response:
+            continue
+        # Split response into sections by heading
+        current_heading = None
+        current_points = []
+        for line in response.split("\n"):
+            line = line.strip()
+            if not line:
+                continue
+            if any(line.lower().startswith(h.lower()) for h in headings):
+                if current_heading and current_points:
+                    findings[current_heading].update(current_points)
+                current_heading = next(h for h in headings if line.lower().startswith(h.lower()))
+                current_points = []
+            elif current_heading and line.startswith("-"):
+                current_points.append(line)
+        if current_heading and current_points:
+            findings[current_heading].update(current_points)
+    # Format consolidated output
+    output = []
+    for heading in headings:
+        if findings[heading]:
+            output.append(f"**{heading}**:")
+            output.extend(sorted(findings[heading]))
+    return "\n".join(output).strip() if output else "No oversights identified."
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
         def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": "🔄 Analyzing..."})
             yield history, None
             extracted = ""
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
+            # Split into small chunks of 1,500 characters
+            chunk_size = 1500
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            chunk_responses = []
+            prompt_template = """
+List doctor oversights under these headings only, with one brief point each. No tools or reasoning steps.
+**Missed Diagnoses**:
+**Medication Conflicts**:
+**Incomplete Assessments**:
+**Urgent Follow-up**:
 Records:
+{chunk}
 """
             try:
+                # Process all chunks, collecting responses
+                for chunk in chunks:
                     prompt = prompt_template.format(chunk=chunk)
                     chunk_response = ""
                     for output in agent.run_gradio_chat(
                                     cleaned = clean_response(m.content)
                                     if cleaned:
                                         chunk_response += cleaned + "\n"
                         elif isinstance(output, str) and output.strip():
                             cleaned = clean_response(output)
                             if cleaned:
                                 chunk_response += cleaned + "\n"
                     if chunk_response:
+                        chunk_responses.append(chunk_response)
+                # Consolidate all responses into one final output
+                final_response = consolidate_findings(chunk_responses)
+                history[-1]["content"] = final_response
+                yield history, None
+                # Generate report file
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path and final_response != "No oversights identified.":
                     with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(final_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e: