CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

f5365bc

verified ·

1 Parent(s): fa0b058

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -10

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ import time
 from transformers import AutoTokenizer
 from functools import lru_cache
 import numpy as np
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -224,38 +225,53 @@ def log_system_usage(tag=""):
         logger.error(f"[{tag}] Monitor failed: {e}")
 def clean_response(text: str) -> str:
-    """Enhanced response cleaning with aggressive artifact removal"""
     if not text:
         return ""
     # Pre-compiled regex patterns for cleaning
     patterns = [
         (re.compile(r"\[.*?\]|\bNone\b", re.IGNORECASE), ""),
         (re.compile(r"To (analyze|proceed).*?medications\.", re.IGNORECASE), ""),
         (re.compile(r"Since the previous attempts.*?\.", re.IGNORECASE), ""),
         (re.compile(r"I need to.*?results\.", re.IGNORECASE), ""),
-        (re.compile(r"(Therefore, )?I will start by retrieving.*?\.", re.IGNORECASE), ""),
-        (re.compile(r"(Therefore, )?Retrieving tools.*?\.", re.IGNORECASE), ""),
         (re.compile(r"This requires reviewing.*?\.", re.IGNORECASE), ""),
         (re.compile(r"Given the context, it is important to review.*?\.", re.IGNORECASE), ""),
         (re.compile(r"\s+"), " "),
         (re.compile(r"[^\w\s\.\,\(\)\-]"), ""),
-        (re.compile(r"(No missed diagnoses identified\.)\s*\1+", re.IGNORECASE), r"\1"),  # Deduplicate
     ]
     for pattern, repl in patterns:
         text = pattern.sub(repl, text)
-    # Deduplicate identical sentences
     sentences = text.split(". ")
     seen = set()
-    unique_sentences = [s for s in sentences if s and not (s in seen or seen.add(s))]
     text = ". ".join(unique_sentences).strip()
     return text if text else "No missed diagnoses identified."
 def summarize_findings(combined_response: str) -> str:
-    """Enhanced findings summarization for a single, detailed paragraph"""
     if not combined_response:
         return "No missed diagnoses were identified in the provided records."
@@ -352,7 +368,7 @@ def init_agent():
 def create_ui(agent):
     """Optimized UI creation with pre-compiled templates"""
     PROMPT_TEMPLATE = """
-Analyze the patient record excerpt for missed diagnoses, focusing only on clinical findings such as symptoms, medications, or evaluation results provided in the excerpt. Provide a concise, evidence-based summary in one paragraph without headings or bullet points. Include specific findings (e.g., 'elevated blood pressure (160/95) on page 10'), their implications (e.g., 'may indicate untreated hypertension'), and recommend urgent review. Do NOT use external tools or retrieve additional data. If no missed diagnoses are found, state 'No missed diagnoses identified' in one sentence. Ignore non-clinical data (e.g., name, date of birth) and other oversight categories (e.g., medication conflicts).
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
@@ -390,7 +406,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
                         extracted.extend(cache[cache_key])
                     else:
                         result = process_file_cached(f.name, file_type)
-                        cache[cache_key] = result
                         extracted.extend(result)
                 file_hash_value = file_hash(files[0].name) if files else ""
@@ -409,6 +425,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
             combined_response = ""
             report_path = None
             try:
                 # Process in optimized batches
@@ -456,7 +473,15 @@ Patient Record Excerpt (Chunk {0} of {1}):
                                             content = clean_response(chunk_output)
                                         if content and content != "No missed diagnoses identified.":
-                                            chunk_response += content + " "
                                 if chunk_response:
                                     combined_response += f"--- Analysis for Chunk {batch_idx + chunk_idx + 1} ---\n{chunk_response.strip()}\n"

 from transformers import AutoTokenizer
 from functools import lru_cache
 import numpy as np
+from difflib import SequenceMatcher
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         logger.error(f"[{tag}] Monitor failed: {e}")
 def clean_response(text: str) -> str:
+    """Enhanced response cleaning with aggressive deduplication"""
     if not text:
         return ""
     # Pre-compiled regex patterns for cleaning
     patterns = [
         (re.compile(r"\[.*?\]|\bNone\b", re.IGNORECASE), ""),
+        (re.compile(r"(The patient record excerpt provides|Patient record excerpt contains).*?(John Doe|general information).*?\.", re.IGNORECASE), ""),
         (re.compile(r"To (analyze|proceed).*?medications\.", re.IGNORECASE), ""),
         (re.compile(r"Since the previous attempts.*?\.", re.IGNORECASE), ""),
         (re.compile(r"I need to.*?results\.", re.IGNORECASE), ""),
+        (re.compile(r"(Therefore, )?(Retrieving|I will start by retrieving) tools.*?\.", re.IGNORECASE), ""),
         (re.compile(r"This requires reviewing.*?\.", re.IGNORECASE), ""),
         (re.compile(r"Given the context, it is important to review.*?\.", re.IGNORECASE), ""),
+        (re.compile(r"Final Analysis\s*", re.IGNORECASE), ""),
         (re.compile(r"\s+"), " "),
         (re.compile(r"[^\w\s\.\,\(\)\-]"), ""),
+        (re.compile(r"(No missed diagnoses identified\.)\s*\1+", re.IGNORECASE), r"\1"),
     ]
     for pattern, repl in patterns:
         text = pattern.sub(repl, text)
+    # Deduplicate near-identical sentences using similarity threshold
     sentences = text.split(". ")
+    unique_sentences = []
     seen = set()
+    for s in sentences:
+        if not s:
+            continue
+        # Check similarity with existing sentences
+        is_unique = True
+        for seen_s in seen:
+            if SequenceMatcher(None, s.lower(), seen_s.lower()).ratio() > 0.9:
+                is_unique = False
+                break
+        if is_unique:
+            unique_sentences.append(s)
+            seen.add(s)
     text = ". ".join(unique_sentences).strip()
     return text if text else "No missed diagnoses identified."
 def summarize_findings(combined_response: str) -> str:
+    """Enhanced findings summarization for a single, concise paragraph"""
     if not combined_response:
         return "No missed diagnoses were identified in the provided records."
 def create_ui(agent):
     """Optimized UI creation with pre-compiled templates"""
     PROMPT_TEMPLATE = """
+Analyze the patient record excerpt for missed diagnoses, focusing ONLY on clinical findings such as symptoms, medications, or evaluation results provided in the excerpt. Provide a concise, evidence-based summary in ONE paragraph without headings, bullet points, or repeating non-clinical data (e.g., name, date of birth, allergies). Include specific findings (e.g., 'elevated blood pressure (160/95)'), their implications (e.g., 'may indicate untreated hypertension'), and recommend urgent review. Treat medications or psychiatric evaluations as potential missed diagnoses. Do NOT use external tools, retrieve additional data, or summarize non-clinical information. If no clinical findings are present, state 'No missed diagnoses identified' in ONE sentence. Ignore other oversight categories (e.g., medication conflicts).
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
                         extracted.extend(cache[cache_key])
                     else:
                         result = process_file_cached(f.name, file_type)
+                        cache[key] = result
                         extracted.extend(result)
                 file_hash_value = file_hash(files[0].name) if files else ""
             combined_response = ""
             report_path = None
+            seen_responses = set()  # Track unique responses to avoid repetition
             try:
                 # Process in optimized batches
                                             content = clean_response(chunk_output)
                                         if content and content != "No missed diagnoses identified.":
+                                            # Check for near-duplicate responses
+                                            is_unique = True
+                                            for seen_response in seen_responses:
+                                                if SequenceMatcher(None, content.lower(), seen_response.lower()).ratio() > 0.9:
+                                                    is_unique = False
+                                                    break
+                                            if is_unique:
+                                                chunk_response += content + " "
+                                                seen_responses.add(content)
                                 if chunk_response:
                                     combined_response += f"--- Analysis for Chunk {batch_idx + chunk_idx + 1} ---\n{chunk_response.strip()}\n"