CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 17

Commit

a58b5f7

verified ·

1 Parent(s): 51aebc3

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -97

app.py CHANGED Viewed

@@ -47,6 +47,9 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
     try:
@@ -68,17 +71,14 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
         if total_pages == 0:
             return ""
-        # Use 6 processes (adjust based on CPU cores)
         num_processes = min(6, multiprocessing.cpu_count())
         pages_per_process = max(1, total_pages // num_processes)
-        # Create page ranges for parallel processing
         ranges = [(i * pages_per_process, min((i + 1) * pages_per_process, total_pages))
                   for i in range(num_processes)]
         if ranges[-1][1] != total_pages:
             ranges[-1] = (ranges[-1][0], total_pages)
-        # Process page ranges in parallel
         with multiprocessing.Pool(processes=num_processes) as pool:
             extract_func = partial(extract_page_range, file_path)
             results = []
@@ -141,22 +141,17 @@ def log_system_usage(tag=""):
 def clean_response(text: str) -> str:
     """Clean TxAgent response to group findings under tool-derived headings."""
     text = sanitize_utf8(text)
-    # Remove tool call artifacts, None, and reasoning
     text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
-    # Remove extra whitespace and non-markdown content
     text = re.sub(r"\n{3,}", "\n\n", text)
-    text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)  # Keep markdown-relevant characters
-    # Define tool-to-heading mapping
     tool_to_heading = {
         "get_abuse_info_by_drug_name": "Drugs",
         "get_dependence_info_by_drug_name": "Drugs",
         "get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
         "get_info_for_patients_by_drug_name": "Drugs",
-        # Add other tools from new_tool.json if applicable
     }
-    # Parse sections and findings
     sections = {}
     current_section = None
     current_tool = None
@@ -165,22 +160,18 @@ def clean_response(text: str) -> str:
         line = line.strip()
         if not line:
             continue
-        # Detect tool tag
         tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
         if tool_match:
             current_tool = tool_match.group(1)
             continue
-        # Detect section heading
-        section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
         if section_match:
             current_section = section_match.group(1)
             if current_section not in sections:
                 sections[current_section] = []
             continue
-        # Detect finding
         finding_match = re.match(r"-\s*.+", line)
         if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
-            # Assign to tool-derived heading if tool is specified
             if current_tool and current_tool in tool_to_heading:
                 heading = tool_to_heading[current_tool]
                 if heading not in sections:
@@ -189,15 +180,14 @@ def clean_response(text: str) -> str:
             else:
                 sections[current_section].append(line)
-    # Combine non-empty sections
     cleaned = []
     for heading, findings in sections.items():
-        if findings:  # Only include sections with findings
             cleaned.append(f"### {heading}\n" + "\n".join(findings))
     text = "\n\n".join(cleaned).strip()
     if not text:
-        text = ""  # Return empty string if no valid findings
     return text
 def init_agent():
@@ -214,7 +204,7 @@ def init_agent():
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
-        step_rag_num=4,
         seed=100,
         additional_default_tools=[],
     )
@@ -223,16 +213,77 @@ def init_agent():
     print("✅ Agent Ready")
     return agent
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
         download_output = gr.File(label="Download Full Report")
-        def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
             yield history, None
@@ -240,7 +291,6 @@ def create_ui(agent):
             extracted = ""
             file_hash_value = ""
             if files:
-                # Progress callback for extraction
                 total_pages = 0
                 processed_pages = 0
                 def update_extraction_progress(current, total):
@@ -257,94 +307,46 @@ def create_ui(agent):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
-            history.pop()  # Remove extraction message
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
-            # Split extracted text into chunks of ~6,000 characters
-            chunk_size = 6000
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
             combined_response = ""
-            prompt_template = """
-You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
-- Clinical context (why the issue was missed or relevant details from the record).
-- Potential risks if unaddressed (e.g., disease progression, adverse events).
-- Actionable recommendations (e.g., tests, referrals, medication adjustments).
-Output ONLY the markdown-formatted findings, with bullet points under each heading. Precede each finding with a tool tag (e.g., [TOOL: get_abuse_info_by_drug_name]) to indicate the tool used. Do NOT include reasoning, tool calls, or intermediate steps. If no issues are found for a tool or category, state "No issues identified" for that section. Ensure the output is specific to the provided text and avoids generic responses.
-Example Output:
-### Drugs
-[TOOL: get_abuse_info_by_drug_name]
-- Opioid use disorder not addressed. Missed due to lack of screening. Risks: overdose. Recommend: addiction specialist referral.
-### Missed Diagnoses
-- Elevated BP noted without diagnosis. Missed due to inconsistent visits. Risks: stroke. Recommend: BP monitoring, antihypertensives.
-### Incomplete Assessments
-- Chest pain not evaluated. Time constraints likely cause. Risks: cardiac issues. Recommend: ECG, stress test.
-### Urgent Follow-up
-- Abnormal creatinine not addressed. Delayed lab review. Risks: renal failure. Recommend: nephrology referral.
-Patient Record Excerpt (Chunk {0} of {1}):
-{chunk}
-"""
             try:
-                # Process each chunk and stream results in real-time
-                for chunk_idx, chunk in enumerate(chunks, 1):
-                    # Update UI with chunk progress
-                    animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
-                    history.append({"role": "assistant", "content": f"Analyzing records... {animation} Chunk {chunk_idx}/{len(chunks)}"})
-                    yield history, None
-                    prompt = prompt_template.format(chunk_idx, len(chunks), chunk=chunk[:4000])  # Truncate to avoid token limits
-                    chunk_response = ""
-                    for chunk_output in agent.run_gradio_chat(
-                        message=prompt,
-                        history=[],
-                        temperature=0.2,
-                        max_new_tokens=1024,
-                        max_token=4096,
-                        call_agent=False,
-                        conversation=[],
-                    ):
-                        if chunk_output is None:
-                            continue
-                        if isinstance(chunk_output, list):
-                            for m in chunk_output:
-                                if hasattr(m, 'content') and m.content:
-                                    cleaned = clean_response(m.content)
-                                    if cleaned and re.search(r"###\s*\w+", cleaned):
-                                        chunk_response += cleaned + "\n\n"
-                                        # Update UI with partial response
-                                        if history[-1]["content"].startswith("Analyzing"):
-                                            history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
-                                        else:
-                                            history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
-                                        yield history, None
-                        elif isinstance(chunk_output, str) and chunk_output.strip():
-                            cleaned = clean_response(chunk_output)
-                            if cleaned and re.search(r"###\s*\w+", cleaned):
-                                chunk_response += cleaned + "\n\n"
-                                # Update UI with partial response
-                                if history[-1]["content"].startswith("Analyzing"):
-                                    history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
-                                else:
-                                    history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
-                                yield history, None
-                    # Append completed chunk response to combined response
-                    if chunk_response:
-                        combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
-                    else:
-                        combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
-                # Finalize UI with complete response
                 if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
                     history[-1]["content"] = combined_response.strip()
                 else:
                     history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
-                # Generate report file
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
                 if report_path:
                     with open(report_path, "w", encoding="utf-8") as f:
@@ -356,8 +358,8 @@ Patient Record Excerpt (Chunk {0} of {1}):
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
-        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
-        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
     return demo
 if __name__ == "__main__":

     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def chunk_hash(chunk: str) -> str:
+    return hashlib.md5(chunk.encode("utf-8")).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
     try:
         if total_pages == 0:
             return ""
         num_processes = min(6, multiprocessing.cpu_count())
         pages_per_process = max(1, total_pages // num_processes)
         ranges = [(i * pages_per_process, min((i + 1) * pages_per_process, total_pages))
                   for i in range(num_processes)]
         if ranges[-1][1] != total_pages:
             ranges[-1] = (ranges[-1][0], total_pages)
         with multiprocessing.Pool(processes=num_processes) as pool:
             extract_func = partial(extract_page_range, file_path)
             results = []
 def clean_response(text: str) -> str:
     """Clean TxAgent response to group findings under tool-derived headings."""
     text = sanitize_utf8(text)
     text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
+    text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
     tool_to_heading = {
         "get_abuse_info_by_drug_name": "Drugs",
         "get_dependence_info_by_drug_name": "Drugs",
         "get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
         "get_info_for_patients_by_drug_name": "Drugs",
     }
     sections = {}
     current_section = None
     current_tool = None
         line = line.strip()
         if not line:
             continue
         tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
         if tool_match:
             current_tool = tool_match.group(1)
             continue
+        section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up|Drugs)", line)
         if section_match:
             current_section = section_match.group(1)
             if current_section not in sections:
                 sections[current_section] = []
             continue
         finding_match = re.match(r"-\s*.+", line)
         if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
             if current_tool and current_tool in tool_to_heading:
                 heading = tool_to_heading[current_tool]
                 if heading not in sections:
             else:
                 sections[current_section].append(line)
     cleaned = []
     for heading, findings in sections.items():
+        if findings:
             cleaned.append(f"### {heading}\n" + "\n".join(findings))
     text = "\n\n".join(cleaned).strip()
     if not text:
+        text = ""
     return text
 def init_agent():
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
+        step_rag_num=2,  # Reduced for speed
         seed=100,
         additional_default_tools=[],
     )
     print("✅ Agent Ready")
     return agent
+def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str) -> str:
+    """Process a single chunk and cache the result."""
+    chunk_id = chunk_hash(chunk)
+    chunk_cache_path = os.path.join(file_cache_dir, f"chunk_{chunk_id}.txt")
+    if os.path.exists(chunk_cache_path):
+        with open(chunk_cache_path, "r", encoding="utf-8") as f:
+            return f.read()
+    prompt_template = """
+You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
+- Clinical context (why the issue was missed or relevant details from the record).
+- Potential risks if unaddressed (e.g., disease progression, adverse events).
+- Actionable recommendations (e.g., tests, referrals, medication adjustments).
+Output ONLY the markdown-formatted findings, with bullet points under each heading. Precede each finding with a tool tag (e.g., [TOOL: get_abuse_info_by_drug_name]) to indicate the tool used. Do NOT include reasoning, tool calls, or intermediate steps. If no issues are found for a tool or category, state "No issues identified" for that section. Ensure the output is specific to the provided text and avoids generic responses.
+Example Output:
+### Drugs
+[TOOL: get_abuse_info_by_drug_name]
+- [Finding placeholder for drug-related issue]
+### Missed Diagnoses
+- [Finding placeholder for missed diagnosis]
+### Incomplete Assessments
+- [Finding placeholder for incomplete assessment]
+### Urgent Follow-up
+- [Finding placeholder for urgent follow-up]
+Patient Record Excerpt (Chunk {0} of {1}):
+{chunk}
+"""
+    prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:2000])  # Truncate to avoid token limits
+    chunk_response = ""
+    for chunk_output in agent.run_gradio_chat(
+        message=prompt,
+        history=[],
+        temperature=0.2,
+        max_new_tokens=512,  # Reduced for speed
+        max_token=2048,      # Reduced for speed
+        call_agent=False,
+        conversation=[],
+    ):
+        if chunk_output is None:
+            continue
+        if isinstance(chunk_output, list):
+            for m in chunk_output:
+                if hasattr(m, 'content') and m.content:
+                    cleaned = clean_response(m.content)
+                    if cleaned and re.search(r"###\s*\w+", cleaned):
+                        chunk_response += cleaned + "\n\n"
+        elif isinstance(chunk_output, str) and chunk_output.strip():
+            cleaned = clean_response(chunk_output)
+            if cleaned and re.search(r"###\s*\w+", cleaned):
+                chunk_response += cleaned + "\n\n"
+    if chunk_response:
+        with open(chunk_cache_path, "w", encoding="utf-8") as f:
+            f.write(chunk_response)
+    return chunk_response
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
+        max_chunks_input = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Max Chunks to Analyze")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
         download_output = gr.File(label="Download Full Report")
+        def analyze(message: str, history: List[dict], files: List, max_chunks: int):
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
             yield history, None
             extracted = ""
             file_hash_value = ""
             if files:
                 total_pages = 0
                 processed_pages = 0
                 def update_extraction_progress(current, total):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
+            history.pop()
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
+            chunk_size = 2000  # Reduced for speed
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            chunks = chunks[:max_chunks]  # Limit to max_chunks
+            total_chunks = len(chunks)
             combined_response = ""
+            if not chunks:
+                history.append({"role": "assistant", "content": "No content to analyze."})
+                yield history, None
+                return
             try:
+                with ThreadPoolExecutor(max_workers=4) as executor:  # Parallel processing
+                    futures = []
+                    for chunk_idx, chunk in enumerate(chunks, 1):
+                        futures.append(executor.submit(process_chunk, agent, chunk, chunk_idx, total_chunks, file_cache_dir))
+                    for idx, future in enumerate(as_completed(futures)):
+                        chunk_response = future.result()
+                        animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
+                        history.append({"role": "assistant", "content": f"Analyzing chunks... {animation} {idx + 1}/{total_chunks}"})
+                        yield history, None
+                        if chunk_response:
+                            combined_response += f"--- Analysis for Chunk {idx + 1} ---\n{chunk_response}\n"
+                        else:
+                            combined_response += f"--- Analysis for Chunk {idx + 1} ---\nNo oversights identified for this chunk.\n\n"
+                        history[-1] = {"role": "assistant", "content": combined_response.strip()}
+                        yield history, None
                 if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
                     history[-1]["content"] = combined_response.strip()
                 else:
                     history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
                 if report_path:
                     with open(report_path, "w", encoding="utf-8") as f:
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
+        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload, max_chunks_input], outputs=[chatbot, download_output])
+        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload, max_chunks_input], outputs=[chatbot, download_output])
     return demo
 if __name__ == "__main__":