CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 17

Commit

28928c8

verified ·

1 Parent(s): 67dd49b

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -95

app.py CHANGED Viewed

@@ -52,8 +52,8 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def chunk_hash(chunk: str, prompt: str) -> str:
-    return hashlib.md5((chunk + prompt).encode("utf-8")).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
@@ -147,32 +147,21 @@ def log_system_usage(tag=""):
         logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
 def clean_response(text: str) -> str:
-    """Clean TxAgent response to group findings under tool-derived headings."""
     text = sanitize_utf8(text)
-    text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
-    tool_to_heading = {
-        "get_abuse_info_by_drug_name": "Drugs",
-        "get_dependence_info_by_drug_name": "Drugs",
-        "get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
-        "get_info_for_patients_by_drug_name": "Drugs",
-    }
     sections = {}
     current_section = None
-    current_tool = None
     lines = text.splitlines()
     for line in lines:
         line = line.strip()
         if not line:
             continue
-        tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
-        if tool_match:
-            current_tool = tool_match.group(1)
-            continue
-        section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up|Drugs)", line)
         if section_match:
             current_section = section_match.group(1)
             if current_section not in sections:
@@ -180,13 +169,7 @@ def clean_response(text: str) -> str:
             continue
         finding_match = re.match(r"-\s*.+", line)
         if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
-            if current_tool and current_tool in tool_to_heading:
-                heading = tool_to_heading[current_tool]
-                if heading not in sections:
-                    sections[heading] = []
-                sections[heading].append(line)
-            else:
-                sections[current_section].append(line)
     cleaned = []
     for heading, findings in sections.items():
@@ -212,97 +195,99 @@ def init_agent():
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
-        step_rag_num=2,
         seed=100,
         additional_default_tools=[],
     )
     agent.init_model()
     log_system_usage("After Load")
     logger.info("Agent Ready")
     return agent
-def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str, prompt_template: str) -> tuple:
-    """Process a single chunk with error handling and caching."""
-    if not chunk.strip():
-        logger.warning(f"Chunk {chunk_idx} is empty, skipping...")
-        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
-    chunk_id = chunk_hash(chunk, prompt_template)
-    chunk_cache_path = os.path.join(cache_path, f"chunk_{chunk_id}.txt")
-    if os.path.exists(chunk_cache_path):
-        with open(chunk_cache_path, "r", encoding="utf-8") as f:
-            logger.info(f"Cache hit for chunk {chunk_idx}")
-            return chunk_idx, f.read()
-    prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:1000])  # Truncate to avoid token limits
-    chunk_response = ""
     try:
-        for chunk_output in agent.run_gradio_chat(
             message=prompt,
             history=[],
             temperature=0.2,
-            max_new_tokens=512,
-            max_token=2048,
             call_agent=False,
             conversation=[],
         ):
-            if chunk_output is None:
                 continue
-            if isinstance(chunk_output, list):
-                for m in chunk_output:
                     if hasattr(m, 'content') and m.content:
                         cleaned = clean_response(m.content)
                         if cleaned and re.search(r"###\s*\w+", cleaned):
-                            chunk_response += cleaned + "\n\n"
-            elif isinstance(chunk_output, str) and chunk_output.strip():
-                cleaned = clean_response(chunk_output)
                 if cleaned and re.search(r"###\s*\w+", cleaned):
-                    chunk_response += cleaned + "\n\n"
     except Exception as e:
-        logger.error(f"Error processing chunk {chunk_idx}: {e}")
-        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nError occurred: {str(e)}\n\n"
-    if chunk_response:
-        with open(chunk_cache_path, "w", encoding="utf-8") as f:
-            f.write(chunk_response)
-        return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
-    return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
-        max_chunks_input = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Max Chunks to Analyze")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
         download_output = gr.File(label="Download Full Report")
         prompt_template = """
-You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
 - Clinical context (why the issue was missed or relevant details from the record).
 - Potential risks if unaddressed (e.g., disease progression, adverse events).
 - Actionable recommendations (e.g., tests, referrals, medication adjustments).
-Output ONLY the markdown-formatted findings, with bullet points under each heading. Precede each finding with a tool tag (e.g., [TOOL: get_abuse_info_by_drug_name]) to indicate the tool used. Do NOT include reasoning, tool calls, or intermediate steps. If no issues are found for a tool or category, state "No issues identified" for that section. Ensure the output is specific to the provided text and avoids generic responses.
 Example Output:
 ### Drugs
-[TOOL: get_abuse_info_by_drug_name]
-- [Finding placeholder for drug-related issue]
 ### Missed Diagnoses
-- [Finding placeholder for missed diagnosis]
 ### Incomplete Assessments
-- [Finding placeholder for incomplete assessment]
 ### Urgent Follow-up
-- [Finding placeholder for urgent follow-up]
-Patient Record Excerpt (Chunk {0} of {1}):
-{chunk}
 """
-        def analyze(message: str, history: List[dict], files: List, max_chunks: int):
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
             yield history, None
@@ -330,39 +315,28 @@ Patient Record Excerpt (Chunk {0} of {1}):
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
-            chunk_size = 1000  # Reduced for speed
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
-            chunks = chunks[:max_chunks]  # Limit to max_chunks
-            total_chunks = len(chunks)
-            combined_response = ""
             if not chunks:
                 history.append({"role": "assistant", "content": "No content to analyze."})
                 yield history, None
                 return
             try:
-                # Sequential processing to avoid VLLM error
-                for chunk_idx, chunk in enumerate(chunks, 1):
-                    animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
-                    history.append({"role": "assistant", "content": f"Analyzing chunk {chunk_idx}/{total_chunks}... {animation}"})
-                    yield history, None
-                    _, chunk_response = process_chunk(agent, chunk, chunk_idx, total_chunks, file_cache_dir, prompt_template)
-                    combined_response += chunk_response
-                    history[-1] = {"role": "assistant", "content": combined_response.strip()}
-                    yield history, None
-                if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
-                    history[-1]["content"] = combined_response.strip()
-                else:
-                    history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
-                if report_path:
                     with open(report_path, "w", encoding="utf-8") as f:
-                        f.write(combined_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
@@ -370,8 +344,8 @@ Patient Record Excerpt (Chunk {0} of {1}):
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
-        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload, max_chunks_input], outputs=[chatbot, download_output])
-        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload, max_chunks_input], outputs=[chatbot, download_output])
     return demo
 if __name__ == "__main__":

     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def batch_hash(chunks: List[str], prompt: str) -> str:
+    return hashlib.md5(("".join(chunks) + prompt).encode("utf-8")).hexdigest()
 def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
     """Extract text from a range of PDF pages."""
         logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
 def clean_response(text: str) -> str:
+    """Clean TxAgent response to group findings by section without tool names."""
     text = sanitize_utf8(text)
+    # Remove tool tags, None, and reasoning
+    text = re.sub(r"\[TOOL:[^\]]+\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text)
     text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
     sections = {}
     current_section = None
     lines = text.splitlines()
     for line in lines:
         line = line.strip()
         if not line:
             continue
+        section_match = re.match(r"###\s*(Drugs|Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
         if section_match:
             current_section = section_match.group(1)
             if current_section not in sections:
             continue
         finding_match = re.match(r"-\s*.+", line)
         if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
+            sections[current_section].append(line)
     cleaned = []
     for heading, findings in sections.items():
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
+        step_rag_num=1,  # Reduced for speed
         seed=100,
         additional_default_tools=[],
+        num_engine_threads=1,  # Limit VLLM threads for stability
     )
     agent.init_model()
     log_system_usage("After Load")
     logger.info("Agent Ready")
     return agent
+def process_batch(agent, chunks: List[str], cache_path: str, prompt_template: str) -> str:
+    """Process a batch of chunks in a single prompt."""
+    if not any(chunk.strip() for chunk in chunks):
+        logger.warning("All chunks are empty, skipping analysis...")
+        return "No oversights identified in the provided records."
+    batch_id = batch_hash(chunks, prompt_template)
+    batch_cache_path = os.path.join(cache_path, f"batch_{batch_id}.txt")
+    if os.path.exists(batch_cache_path):
+        with open(batch_cache_path, "r", encoding="utf-8") as f:
+            logger.info("Cache hit for batch")
+            return f.read()
+    # Combine chunks into one prompt
+    chunk_texts = [f"Chunk {i+1}:\n{chunk[:500]}" for i, chunk in enumerate(chunks) if chunk.strip()]
+    combined_text = "\n\n".join(chunk_texts)
+    prompt = prompt_template.format(chunks=combined_text)
+    response = ""
     try:
+        for output in agent.run_gradio_chat(
             message=prompt,
             history=[],
             temperature=0.2,
+            max_new_tokens=256,  # Reduced for speed
+            max_token=1024,      # Reduced for speed
             call_agent=False,
             conversation=[],
         ):
+            if output is None:
                 continue
+            if isinstance(output, list):
+                for m in output:
                     if hasattr(m, 'content') and m.content:
                         cleaned = clean_response(m.content)
                         if cleaned and re.search(r"###\s*\w+", cleaned):
+                            response += cleaned + "\n\n"
+            elif isinstance(output, str) and output.strip():
+                cleaned = clean_response(output)
                 if cleaned and re.search(r"###\s*\w+", cleaned):
+                    response += cleaned + "\n\n"
     except Exception as e:
+        logger.error(f"Error processing batch: {e}")
+        return f"Error occurred: {str(e)}"
+    if response:
+        with open(batch_cache_path, "w", encoding="utf-8") as f:
+            f.write(response)
+        return response
+    return "No oversights identified in the provided records."
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
         file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
         download_output = gr.File(label="Download Full Report")
         prompt_template = """
+You are a medical analysis assistant. Analyze the following patient record excerpts for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under the following headings: 'Drugs', 'Missed Diagnoses', 'Medication Conflicts', 'Incomplete Assessments', 'Urgent Follow-up'. For each finding, include:
 - Clinical context (why the issue was missed or relevant details from the record).
 - Potential risks if unaddressed (e.g., disease progression, adverse events).
 - Actionable recommendations (e.g., tests, referrals, medication adjustments).
+Output ONLY the markdown-formatted findings, with bullet points under each heading. Do NOT include tool references, reasoning, or intermediate steps. If no issues are found for a section, omit that section. Ensure the output is specific to the provided text and avoids generic responses.
 Example Output:
 ### Drugs
+- Opioid use disorder not addressed. Missed due to lack of screening. Risks: overdose. Recommend: addiction specialist referral.
 ### Missed Diagnoses
+- Elevated BP noted without diagnosis. Missed due to inconsistent visits. Risks: stroke. Recommend: BP monitoring, antihypertensives.
 ### Incomplete Assessments
+- Chest pain not evaluated. Time constraints likely cause. Risks: cardiac issues. Recommend: ECG, stress test.
 ### Urgent Follow-up
+- Abnormal creatinine not addressed. Delayed lab review. Risks: renal failure. Recommend: nephrology referral.
+Patient Record Excerpts:
+{chunks}
 """
+        def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
             history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
             yield history, None
             history.append({"role": "assistant", "content": "✅ Text extraction complete."})
             yield history, None
+            chunk_size = 500  # Fixed for speed
+            max_chunks = 5    # Fixed for speed
             chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
+            chunks = chunks[:max_chunks]  # Limit to 5 chunks
             if not chunks:
                 history.append({"role": "assistant", "content": "No content to analyze."})
                 yield history, None
                 return
             try:
+                animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
+                history.append({"role": "assistant", "content": f"Analyzing chunks 1-5... {animation}"})
+                yield history, None
+                response = process_batch(agent, chunks, file_cache_dir, prompt_template)
+                history[-1] = {"role": "assistant", "content": response.strip()}
+                yield history, None
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path and response.strip() and "No oversights identified" not in response and "Error occurred" not in response:
                     with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
+        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
+        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
     return demo
 if __name__ == "__main__":