CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

5bfcdc0

verified ·

1 Parent(s): d88209d

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +109 -87

src/txagent/txagent.py CHANGED Viewed

@@ -14,6 +14,8 @@ from .toolrag import ToolRAGModel
 import torch
 import logging
 from difflib import SequenceMatcher
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -102,7 +104,6 @@ class TxAgent:
     def initialize_tools_prompt(self, call_agent, call_agent_level, message):
         picked_tools_prompt = []
-        # Only add Finish tool unless prompt explicitly requires Tool_RAG or CallAgent
         if "use external tools" not in message.lower():
             picked_tools_prompt = self.add_special_tools(picked_tools_prompt, call_agent=False)
         else:
@@ -319,7 +320,6 @@ class TxAgent:
         if self.enable_checker:
             checker = ReasoningTraceChecker(message, conversation)
-        # Check if message contains clinical findings
         clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
         has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
@@ -355,7 +355,6 @@ class TxAgent:
                     logger.warning("Checker error: %s", wrong_info)
                     break
-            # Skip tool calls if clinical data is present
             tools = [] if has_clinical_data else picked_tools_prompt
             last_outputs = []
             last_outputs_str, token_overflow = self.llm_infer(
@@ -382,7 +381,6 @@ class TxAgent:
                 m['content'] for m in messages[-3:] if m['role'] == 'assistant'
             ][:2]
             forbidden_ids = [tokenizer.encode(msg, add_special_tokens=False) for msg in assistant_messages]
-            # Enhance deduplication with similarity check
             unique_sentences = []
             for msg in assistant_messages:
                 sentences = msg.split('. ')
@@ -397,7 +395,7 @@ class TxAgent:
                     if is_unique:
                         unique_sentences.append(s)
             forbidden_ids = [tokenizer.encode(s, add_special_tokens=False) for s in unique_sentences]
-            return [NoRepeatSentenceProcessor(forbidden_ids, 10)]  # Increased penalty
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None, output_begin_string=None,
@@ -435,6 +433,28 @@ class TxAgent:
             return output, False
         return output
     def run_self_agent(self, message: str, temperature: float, max_new_tokens: int, max_token: int):
         logger.debug("Starting self agent")
         conversation = self.set_system_prompt([], self.self_prompt)
@@ -565,30 +585,19 @@ Summarize the function responses in one sentence with all necessary information.
         logger.debug("Updated parameters: %s", updated_attributes)
         return updated_attributes
-    def run_gradio_chat(self, message: str, history: list, temperature: float,
-                        max_new_tokens: int, max_token: int, call_agent: bool,
-                        conversation: gr.State, max_round: int = 3, seed: int = None,
-                        call_agent_level: int = 0, sub_agent_task: str = None,
-                        uploaded_files: list = None):
-        logger.debug("Chat started, message: %s", message[:100])
-        if not message or len(message.strip()) < 5:
-            yield "Please provide a valid message or upload files to analyze."
-            return
-        if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
-            return
-        # Check if message contains clinical findings
-        clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
-        has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
-        call_agent = call_agent and not has_clinical_data  # Disable CallAgent for clinical data
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
-        conversation = self.initialize_conversation(
-            message, conversation, history)
-        history = []
         next_round = True
         current_round = 0
         enable_summary = False
@@ -603,24 +612,17 @@ Summarize the function responses in one sentence with all necessary information.
                 current_round += 1
                 last_outputs = []
                 if last_outputs:
-                    function_call_messages, picked_tools_prompt, special_tool_call, current_gradio_history = yield from self.run_function_call_stream(
                         last_outputs, return_message=True, existing_tools_prompt=picked_tools_prompt,
                         message_for_call_agent=message, call_agent=call_agent,
-                        call_agent_level=call_agent_level, temperature=temperature)
-                    history.extend(current_gradio_history)
                     if special_tool_call == 'Finish':
-                        yield history
                         next_round = False
                         conversation.extend(function_call_messages)
-                        return function_call_messages[0]['content']
-                    if special_tool_call in ['RequireClarification', 'DirectResponse']:
-                        last_msg = history[-1] if history else ChatMessage(role="assistant", content="Response needed.")
-                        history.append(ChatMessage(role="assistant", content=last_msg.content))
-                        yield history
-                        next_round = False
-                        return last_msg.content
                     if (self.enable_summary or token_overflow) and not call_agent:
                         enable_summary = True
@@ -629,10 +631,11 @@ Summarize the function responses in one sentence with all necessary information.
                     if function_call_messages:
                         conversation.extend(function_call_messages)
-                        yield history
                     else:
                         next_round = False
-                        return ''.join(last_outputs).replace("</s>", "")
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
@@ -640,8 +643,7 @@ Summarize the function responses in one sentence with all necessary information.
                         logger.warning("Checker error: %s", wrong_info)
                         break
-                # Skip tool calls if clinical data is present
-                tools = [] if has_clinical_data else picked_tools_prompt
                 last_outputs_str, token_overflow = self.llm_infer(
                     messages=conversation, temperature=temperature, tools=tools,
                     max_new_tokens=max_new_tokens, max_token=max_token, seed=seed, check_token_status=True)
@@ -650,54 +652,74 @@ Summarize the function responses in one sentence with all necessary information.
                     if self.force_finish:
                         last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                             conversation, temperature, max_new_tokens, max_token)
-                        history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
-                        yield history
-                        return last_outputs_str
-                    error_msg = "Token limit exceeded."
-                    history.append(ChatMessage(role="assistant", content=error_msg))
-                    yield history
-                    return error_msg
-                last_thought = last_outputs_str.split("[TOOL_CALLS]")[0]
-                for msg in history:
-                    if msg.metadata:
-                        msg.metadata['status'] = 'done'
-                if '[FinalAnswer]' in last_thought:
-                    parts = last_thought.split('[FinalAnswer]', 1)
-                    final_thought, final_answer = parts if len(parts) == 2 else (last_thought, "")
-                    history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                    yield history
-                    history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                    yield history
-                else:
-                    history.append(ChatMessage(role="assistant", content=last_thought))
-                    yield history
                 last_outputs.append(last_outputs_str)
             if next_round and self.force_finish:
                 last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                     conversation, temperature, max_new_tokens, max_token)
-                parts = last_outputs_str.split('[FinalAnswer]', 1)
-                final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
-                history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                yield history
-                history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                yield history
         except Exception as e:
-            logger.error("Exception in run_gradio_chat: %s", e, exc_info=True)
-            error_msg = f"Error: {e}"
-            history.append(ChatMessage(role="assistant", content=error_msg))
-            yield history
-            if self.force_finish:
-                last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
-                    conversation, temperature, max_new_tokens, max_token)
-                parts = last_outputs_str.split('[FinalAnswer]', 1)
-                final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
-                history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                yield history
-                history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                yield history
-            return error_msg

 import torch
 import logging
 from difflib import SequenceMatcher
+import asyncio
+import threading
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
     def initialize_tools_prompt(self, call_agent, call_agent_level, message):
         picked_tools_prompt = []
         if "use external tools" not in message.lower():
             picked_tools_prompt = self.add_special_tools(picked_tools_prompt, call_agent=False)
         else:
         if self.enable_checker:
             checker = ReasoningTraceChecker(message, conversation)
         clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
         has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
                     logger.warning("Checker error: %s", wrong_info)
                     break
             tools = [] if has_clinical_data else picked_tools_prompt
             last_outputs = []
             last_outputs_str, token_overflow = self.llm_infer(
                 m['content'] for m in messages[-3:] if m['role'] == 'assistant'
             ][:2]
             forbidden_ids = [tokenizer.encode(msg, add_special_tokens=False) for msg in assistant_messages]
             unique_sentences = []
             for msg in assistant_messages:
                 sentences = msg.split('. ')
                     if is_unique:
                         unique_sentences.append(s)
             forbidden_ids = [tokenizer.encode(s, add_special_tokens=False) for s in unique_sentences]
+            return [NoRepeatSentenceProcessor(forbidden_ids, 15)]  # Increased penalty
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None, output_begin_string=None,
             return output, False
         return output
+    def run_quick_summary(self, message: str, temperature: float = 0.1, max_new_tokens: int = 256, max_token: int = 1024):
+        """Generate a fast, concise summary of potential missed diagnoses without tool calls"""
+        logger.debug("Starting quick summary for message: %s", message[:100])
+        prompt = """
+Analyze the patient record excerpt for missed diagnoses, focusing ONLY on clinical findings such as symptoms, medications, or evaluation results. Provide a concise summary in ONE paragraph without headings or bullet points. ALWAYS treat medications or psychiatric evaluations as potential missed diagnoses, specifying their implications (e.g., 'use of Seroquel may indicate untreated psychosis'). Recommend urgent review for identified findings. Do NOT use external tools or repeat non-clinical data (e.g., name, date of birth). If no clinical findings are present, state 'No missed diagnoses identified' in ONE sentence.
+Patient Record Excerpt:
+{chunk}
+"""
+        conversation = self.set_system_prompt([], prompt.format(chunk=message))
+        conversation.append({"role": "user", "content": message})
+        output = self.llm_infer(
+            messages=conversation,
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            max_token=max_token,
+            tools=[]  # No tools
+        )
+        if '[FinalAnswer]' in output:
+            output = output.split('[FinalAnswer]')[-1].strip()
+        logger.debug("Quick summary output: %s", output[:100])
+        return output
     def run_self_agent(self, message: str, temperature: float, max_new_tokens: int, max_token: int):
         logger.debug("Starting self agent")
         conversation = self.set_system_prompt([], self.self_prompt)
         logger.debug("Updated parameters: %s", updated_attributes)
         return updated_attributes
+    async def run_background_report(self, message: str, history: list, temperature: float,
+                                   max_new_tokens: int, max_token: int, call_agent: bool,
+                                   conversation: gr.State, max_round: int, seed: int,
+                                   call_agent_level: int, report_path: str):
+        """Run detailed report generation in the background and save to file"""
+        logger.debug("Starting background report for message: %s", message[:100])
+        combined_response = ""
+        history_copy = history.copy()
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
+        conversation = self.initialize_conversation(message, conversation, history_copy)
         next_round = True
         current_round = 0
         enable_summary = False
                 current_round += 1
                 last_outputs = []
                 if last_outputs:
+                    function_call_messages, picked_tools_prompt, special_tool_call, _ = yield from self.run_function_call_stream(
                         last_outputs, return_message=True, existing_tools_prompt=picked_tools_prompt,
                         message_for_call_agent=message, call_agent=call_agent,
+                        call_agent_level=call_agent_level, temperature=temperature,
+                        return_gradio_history=False)
                     if special_tool_call == 'Finish':
                         next_round = False
                         conversation.extend(function_call_messages)
+                        combined_response += function_call_messages[0]['content'] + "\n"
+                        break
                     if (self.enable_summary or token_overflow) and not call_agent:
                         enable_summary = True
                     if function_call_messages:
                         conversation.extend(function_call_messages)
+                        combined_response += tool_result_format(function_call_messages) + "\n"
                     else:
                         next_round = False
+                        combined_response += ''.join(last_outputs).replace("</s>", "") + "\n"
+                        break
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
                         logger.warning("Checker error: %s", wrong_info)
                         break
+                tools = picked_tools_prompt
                 last_outputs_str, token_overflow = self.llm_infer(
                     messages=conversation, temperature=temperature, tools=tools,
                     max_new_tokens=max_new_tokens, max_token=max_token, seed=seed, check_token_status=True)
                     if self.force_finish:
                         last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                             conversation, temperature, max_new_tokens, max_token)
+                        combined_response += last_outputs_str + "\n"
+                        break
+                    combined_response += "Token limit exceeded.\n"
+                    break
+                combined_response += last_outputs_str + "\n"
                 last_outputs.append(last_outputs_str)
             if next_round and self.force_finish:
                 last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                     conversation, temperature, max_new_tokens, max_token)
+                combined_response += last_outputs_str + "\n"
+            # Save report
+            try:
+                with open(report_path, "w", encoding="utf-8") as f:
+                    f.write(combined_response)
+                logger.info("Detailed report saved to %s", report_path)
+            except Exception as e:
+                logger.error("Failed to save report: %s", e)
         except Exception as e:
+            logger.error("Background report error: %s", e)
+            combined_response += f"Error: {e}\n"
+            with open(report_path, "w", encoding="utf-8") as f:
+                f.write(combined_response)
+        finally:
+            torch.cuda.empty_cache()
+            gc.collect()
+    def run_gradio_chat(self, message: str, history: list, temperature: float,
+                        max_new_tokens: int, max_token: int, call_agent: bool,
+                        conversation: gr.State, max_round: int = 3, seed: int = None,
+                        call_agent_level: int = 0, sub_agent_task: str = None,
+                        uploaded_files: list = None, report_path: str = None):
+        logger.debug("Chat started, message: %s", message[:100])
+        if not message or len(message.strip()) < 5:
+            yield "Please provide a valid message or upload files to analyze."
+            return
+        if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
+            return
+        clinical_keywords = ['medication', 'symptom', 'evaluation', 'diagnosis']
+        has_clinical_data = any(keyword in message.lower() for keyword in clinical_keywords)
+        call_agent = call_agent and not has_clinical_data
+        # Generate quick summary
+        quick_summary = self.run_quick_summary(
+            message, temperature=temperature, max_new_tokens=256, max_token=1024)
+        history.append(ChatMessage(role="assistant", content=f"**Quick Summary:**\n{quick_summary}"))
+        yield history
+        # Start background report generation
+        if report_path:
+            loop = asyncio.get_event_loop()
+            threading.Thread(
+                target=lambda: loop.run_until_complete(
+                    self.run_background_report(
+                        message, history, temperature, max_new_tokens, max_token, call_agent,
+                        conversation, max_round, seed, call_agent_level, report_path
+                    )
+                ),
+                daemon=True
+            ).start()
+            history.append(ChatMessage(
+                role="assistant",
+                content="Generating detailed report in the background. Download will be available when ready."
+            ))
+            yield history