CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

26668b6

verified ·

1 Parent(s): 96347cc

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -38

app.py CHANGED Viewed

@@ -47,17 +47,15 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def extract_priority_pages(file_path: str, max_pages: int = 10) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
-            for i, page in enumerate(pdf.pages[:3]):
-                text = page.extract_text() or ""
-                text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
-            for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
-                if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
-                    text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
@@ -160,7 +158,12 @@ def create_ui(agent):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
-            prompt = f"""
 Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
 1. **Missed Diagnoses**:
@@ -178,8 +181,8 @@ Analyze the medical records for clinical oversights. Provide a concise, evidence
 4. **Urgent Follow-up**:
    - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
-Medical Records (Truncated to 8k chars):
-{extracted[:8000]}
 Begin analysis:
 """
@@ -188,35 +191,44 @@ Begin analysis:
                 if history and history[-1]["content"].startswith("⏳"):
                     history.pop()
-                for chunk in agent.run_gradio_chat(
-                    message=prompt,
-                    history=[],
-                    temperature=0.2,
-                    max_new_tokens=1024,
-                    max_token=4096,
-                    call_agent=False,
-                    conversation=[],
-                ):
-                    if chunk is None:
-                        continue
-                    if isinstance(chunk, list):
-                        for m in chunk:
-                            if hasattr(m, 'content') and m.content:
-                                cleaned = clean_response(m.content)
-                                if cleaned:
-                                    history.append({"role": m.role, "content": cleaned})
-                                    yield history, None
-                    elif isinstance(chunk, str) and chunk.strip():
-                        cleaned = clean_response(chunk)
-                        if cleaned:
-                            if history and history[-1]["role"] == "assistant":
-                                history[-1]["content"] += cleaned
-                            else:
-                                history.append({"role": "assistant", "content": cleaned})
-                            yield history, None
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:

     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_priority_pages(file_path: str) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
+                # Include first 3 pages or pages with medical keywords
+                if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
+                    text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
         return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
+            # Split extracted text into chunks of ~6,000 characters
+            chunk_size = 6000
+            chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            combined_response = ""
+            prompt_template = f"""
 Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
 1. **Missed Diagnoses**:
 4. **Urgent Follow-up**:
    - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
+Medical Records (Chunk {0}):
+{{chunk}}
 Begin analysis:
 """
                 if history and history[-1]["content"].startswith("⏳"):
                     history.pop()
+                # Process each chunk sequentially
+                for chunk_idx, chunk in enumerate(chunks, 1):
+                    prompt = prompt_template.format(chunk_idx, chunk=chunk)
+                    chunk_response = ""
+                    for chunk_output in agent.run_gradio_chat(
+                        message=prompt,
+                        history=[],
+                        temperature=0.2,
+                        max_new_tokens=1024,
+                        max_token=4096,
+                        call_agent=False,
+                        conversation=[],
+                    ):
+                        if chunk_output is None:
+                            continue
+                        if isinstance(chunk_output, list):
+                            for m in chunk_output:
+                                if hasattr(m, 'content') and m.content:
+                                    cleaned = clean_response(m.content)
+                                    if cleaned:
+                                        chunk_response += cleaned + "\n"
+                        elif isinstance(chunk_output, str) and chunk_output.strip():
+                            cleaned = clean_response(chunk_output)
+                            if cleaned:
+                                chunk_response += cleaned + "\n"
+                    combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
+                # Update history with combined response
+                if combined_response:
+                    history.append({"role": "assistant", "content": combined_response.strip()})
+                else:
+                    history.append({"role": "assistant", "content": "No oversights identified."})
+                # Generate report file
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path:
+                    with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(combined_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e: