CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 14

Commit

072b189

verified ·

1 Parent(s): 8126e99

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -19

app.py CHANGED Viewed

@@ -51,9 +51,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -132,24 +134,15 @@ def init_agent():
     return agent
 def clean_response(response: str) -> str:
-    """Enhanced response cleaner that handles duplicates and tool calls."""
-    # First extract the main analysis content
-    analysis_match = re.search(
-        r'(Based on the medical records provided.*?)(?=\[TOOL_CALLS\]|Based on|$)',
-        response,
-        flags=re.DOTALL
-    )
-    if analysis_match:
-        cleaned = analysis_match.group(1).strip()
-    else:
-        # Fallback if pattern not found
-        cleaned = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL).strip()
-    # Remove any remaining JSON artifacts
     cleaned = re.sub(r'\{.*?\}', '', cleaned)
     cleaned = re.sub(r'\[.*?\]', '', cleaned)
     return cleaned
 def create_ui(agent):
@@ -217,13 +210,13 @@ Medical Records:
                         history[-1] = {"role": "assistant", "content": current_cleaned}
                         yield history, None
-                # Final processing
                 final_cleaned = clean_response(full_response)
                 if not final_cleaned:
                     final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
-                # Save report
                 report_path = None
                 if file_hash_value:
                     report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
@@ -252,4 +245,4 @@ if __name__ == "__main__":
         show_error=True,
         allowed_paths=[report_dir],
         share=False
-    )

     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            # Always include the first three pages
             for i, page in enumerate(pdf.pages[:3]):
                 text = page.extract_text() or ""
                 text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
+            # Then include pages that mention one or more medical keywords
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
     return agent
 def clean_response(response: str) -> str:
+    """
+    Updated cleaner that removes the [TOOL_CALLS] tag and any JSON artifacts
+    while preserving the full analysis so that all identified oversights are displayed.
+    """
+    # Remove everything starting from the first [TOOL_CALLS] occurrence
+    cleaned = response.split("[TOOL_CALLS]")[0].strip()
+    # Remove any remaining JSON artifacts in case they appear
     cleaned = re.sub(r'\{.*?\}', '', cleaned)
     cleaned = re.sub(r'\[.*?\]', '', cleaned)
     return cleaned
 def create_ui(agent):
                         history[-1] = {"role": "assistant", "content": current_cleaned}
                         yield history, None
+                # Final processing of the complete response
                 final_cleaned = clean_response(full_response)
                 if not final_cleaned:
                     final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
+                # Save report if a file was processed
                 report_path = None
                 if file_hash_value:
                     report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
         show_error=True,
         allowed_paths=[report_dir],
         share=False
+    )