CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

2e43581

verified ·

1 Parent(s): 1ba0100

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -57

app.py CHANGED Viewed

@@ -34,9 +34,6 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
-                    'allergies', 'summary', 'impression', 'findings', 'recommendations'}
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
@@ -44,23 +41,14 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def extract_priority_pages(file_path: str, max_chars: int = 6000) -> str:
     try:
         text_chunks = []
-        total_chars = 0
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
-                if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
-                    page_chunk = f"=== Page {i+1} ===\n{page_text.strip()}\n"
-                    if total_chars + len(page_chunk) <= max_chars:
-                        text_chunks.append(page_chunk)
-                        total_chars += len(page_chunk)
-                    else:
-                        remaining = max_chars - total_chars
-                        text_chunks.append(page_chunk[:remaining])
-                        break
-        return "".join(text_chunks).strip()
     except Exception as e:
         return f"PDF processing error: {str(e)}"
@@ -73,7 +61,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
                 return f.read()
         if file_type == "pdf":
-            text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
@@ -103,7 +91,7 @@ def clean_response(text: str) -> str:
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
     text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
     text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
-    text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)).*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
     if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
         return ""
@@ -147,59 +135,68 @@ def create_ui(agent):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
-            prompt = f"""
-Analyze the medical records and list potential doctor oversights under these headings only, with brief details:
-**Missed Diagnoses**: Inconsistencies or unaddressed conditions.
-**Medication Conflicts**: Contraindications or risky prescriptions.
-**Incomplete Assessments**: Missing or shallow evaluations.
-**Urgent Follow-up**: Issues needing immediate attention.
-Records:
-{extracted[:6000]}
-Respond concisely.
 """
             try:
                 history.append({"role": "assistant", "content": "🔄 Analyzing..."})
                 yield history, None
-                response = ""
-                for output in agent.run_gradio_chat(
-                    message=prompt,
-                    history=[],
-                    temperature=0.1,
-                    max_new_tokens=512,
-                    max_token=4096,
-                    call_agent=False,
-                    conversation=[],
-                ):
-                    if output is None:
-                        continue
-                    if isinstance(output, list):
-                        for m in output:
-                            if hasattr(m, 'content') and m.content:
-                                cleaned = clean_response(m.content)
-                                if cleaned:
-                                    response += cleaned + "\n"
-                                    history[-1]["content"] = response.strip()
-                                    yield history, None
-                    elif isinstance(output, str) and output.strip():
-                        cleaned = clean_response(output)
-                        if cleaned:
-                            response += cleaned + "\n"
-                            history[-1]["content"] = response.strip()
-                            yield history, None
-                if not response:
                     history[-1]["content"] = "No oversights identified."
-                yield history, None
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
-                if report_path and response:
                     with open(report_path, "w", encoding="utf-8") as f:
-                        f.write(response.strip())
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:

 from txagent.txagent import TxAgent
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_all_pages(file_path: str) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
+                text_chunks.append(page_text.strip())
+        return "\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
                 return f.read()
         if file_type == "pdf":
+            text = extract_all_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
     text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
     text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
+    text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)|none).*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
     if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
         return ""
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
+            # Split into small chunks of 2,000 characters
+            chunk_size = 2000
+            chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            combined_response = ""
+            prompt_template = f"""
+List doctor oversights in the medical records under these headings with brief details:
+**Missed Diagnoses**: Unaddressed conditions or inconsistencies.
+**Medication Conflicts**: Risky prescriptions.
+**Incomplete Assessments**: Missing evaluations.
+**Urgent Follow-up**: Issues needing attention.
+Records:
+{{chunk}}
 """
             try:
                 history.append({"role": "assistant", "content": "🔄 Analyzing..."})
                 yield history, None
+                for chunk_idx, chunk in enumerate(chunks, 1):
+                    prompt = prompt_template.format(chunk=chunk)
+                    chunk_response = ""
+                    for output in agent.run_gradio_chat(
+                        message=prompt,
+                        history=[],
+                        temperature=0.1,
+                        max_new_tokens=256,
+                        max_token=4096,
+                        call_agent=False,
+                        conversation=[],
+                    ):
+                        if output is None:
+                            continue
+                        if isinstance(output, list):
+                            for m in output:
+                                if hasattr(m, 'content') and m.content:
+                                    cleaned = clean_response(m.content)
+                                    if cleaned:
+                                        chunk_response += cleaned + "\n"
+                                        history[-1]["content"] = combined_response + chunk_response.strip()
+                                        yield history, None
+                        elif isinstance(output, str) and output.strip():
+                            cleaned = clean_response(output)
+                            if cleaned:
+                                chunk_response += cleaned + "\n"
+                                history[-1]["content"] = combined_response + chunk_response.strip()
+                                yield history, None
+                    if chunk_response:
+                        combined_response += chunk_response
+                if not combined_response:
                     history[-1]["content"] = "No oversights identified."
+                else:
+                    history[-1]["content"] = combined_response.strip()
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path and combined_response:
                     with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(combined_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e: