CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

5707e8d

verified ·

1 Parent(s): d313543

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +52 -139

src/txagent/txagent.py CHANGED Viewed

@@ -23,11 +23,11 @@ class TxAgent:
     def __init__(self, model_name,
                  rag_model_name,
                  tool_files_dict=None,
-                 enable_finish=True,
-                 enable_rag=True,
                  enable_summary=False,
                  init_rag_num=0,
-                 step_rag_num=10,
                  summary_mode='step',
                  summary_skip_last_k=0,
                  summary_context_length=None,
@@ -79,7 +79,7 @@ class TxAgent:
             if model_name == self.model_name:
                 return f"The model {model_name} is already loaded."
             self.model_name = model_name
-        self.model = LLM(model=self.model_name, enforce_eager=True)
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         return f"Model {model_name} loaded successfully."
@@ -165,16 +165,16 @@ class TxAgent:
     def add_special_tools(self, tools, call_agent=False):
         if not self.enable_rag and not self.enable_finish:
             return tools
-        if self.enable_finish and self.tooluniverse:  # MODIFIED: Check tooluniverse
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
             logger.info("Finish tool is added")
-        if call_agent and self.tooluniverse:  # MODIFIED: Check tooluniverse
             tools.append(self.tooluniverse.get_one_tool_by_one_name('CallAgent', return_prompt=True))
             logger.info("CallAgent tool is added")
-        elif self.enable_rag and self.tooluniverse:  # MODIFIED: Check tooluniverse
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Tool_RAG', return_prompt=True))
             logger.info("Tool_RAG tool is added")
-        if self.additional_default_tools is not None and self.tooluniverse:  # MODIFIED: Check tooluniverse
             for each_tool_name in self.additional_default_tools:
                 tool_prompt = self.tooluniverse.get_one_tool_by_one_name(each_tool_name, return_prompt=True)
                 if tool_prompt is not None:
@@ -183,7 +183,7 @@ class TxAgent:
         return tools
     def add_finish_tools(self, tools):
-        if not self.enable_finish or not self.tooluniverse:  # MODIFIED: Check tooluniverse
             return tools
         tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
         logger.info("Finish tool is added")
@@ -346,10 +346,9 @@ class TxAgent:
     def get_answer_based_on_unfinished_reasoning(self, conversation, temperature, max_new_tokens, max_token, outputs=None):
         if conversation[-1]['role'] == 'assistant':
             conversation.append({'role': 'tool', 'content': 'Errors occurred, provide final answer.'})
-        finish_tools_prompt = self.add_finish_tools([]) if self.enable_finish else []
         last_outputs_str = self.llm_infer(messages=conversation,
                                          temperature=temperature,
-                                         tools=finish_tools_prompt,
                                          output_begin_string='[FinalAnswer]',
                                          skip_special_tokens=True,
                                          max_new_tokens=max_new_tokens, max_token=max_token)
@@ -654,131 +653,45 @@ Generate one summarized sentence about "function calls' responses" with necessar
             return "Invalid input."
         if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
             return ""
-        outputs = []
-        last_outputs = []
-        picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
-            call_agent, call_agent_level, message)
-        conversation = self.initialize_conversation(
-            message, conversation=conversation, history=history)
-        history = [] if not history else history  # MODIFIED: Simplify history
-        next_round = True
-        function_call_messages = []
-        current_round = 0
-        enable_summary = False
-        last_status = {}
-        token_overflow = False
-        if self.enable_checker:
-            checker = ReasoningTraceChecker(message, conversation, init_index=len(conversation))
-        try:
-            while next_round and current_round < max_round:
-                current_round += 1
-                logger.debug(f"Round {current_round}, conversation length: {len(conversation)}")
-                if last_outputs and self.enable_rag:
-                    function_call_messages, picked_tools_prompt, special_tool_call, current_gradio_history = yield from self.run_function_call_stream(
-                        last_outputs, return_message=True,
-                        existing_tools_prompt=picked_tools_prompt,
-                        message_for_call_agent=message,
-                        call_agent=call_agent,
-                        call_agent_level=call_agent_level,
-                        temperature=temperature)
-                    history.extend(current_gradio_history)
-                    if special_tool_call == 'Finish' and function_call_messages:
-                        yield history
-                        next_round = False
-                        conversation.extend(function_call_messages)
-                        return function_call_messages[0]['content']
-                    elif special_tool_call in ['RequireClarification', 'DirectResponse']:
-                        last_msg = history[-1] if history else ChatMessage(role="assistant", content="Response needed.")
-                        history.append(ChatMessage(role="assistant", content=last_msg.content))
-                        yield history
-                        next_round = False
-                        return last_msg.content
-                    if (self.enable_summary or token_overflow) and not call_agent:
-                        enable_summary = True
-                    last_status = self.function_result_summary(
-                        conversation, status=last_status, enable_summary=enable_summary)
-                    if function_call_messages:
-                        conversation.extend(function_call_messages)
-                        yield history
-                    else:
-                        next_round = False
-                        conversation.append({"role": "assistant", "content": ''.join(last_outputs)})
-                        return ''.join(last_outputs).replace("</s>", "")
-                if self.enable_checker:
-                    good_status, wrong_info = checker.check_conversation()
-                    if not good_status:
-                        logger.warning(f"Checker flagged reasoning error: {wrong_info}")
-                        break
-                last_outputs = []
-                last_outputs_str, token_overflow = self.llm_infer(
-                    messages=conversation,
-                    temperature=temperature,
-                    tools=picked_tools_prompt,
-                    skip_special_tokens=False,
-                    max_new_tokens=max_new_tokens,
-                    max_token=max_token,
-                    seed=seed,
-                    check_token_status=True)
-                logger.debug(f"llm_infer output: {last_outputs_str[:100] if last_outputs_str else None}")
-                if last_outputs_str is None:
-                    logger.warning("Token overflow")
-                    if self.force_finish:
-                        last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
-                            conversation, temperature, max_new_tokens, max_token)
-                        history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
-                        yield history
-                        return last_outputs_str
-                    error_msg = "Token limit exceeded."
-                    history.append(ChatMessage(role="assistant", content=error_msg))
-                    yield history
-                    return error_msg
-                last_thought = last_outputs_str.split("[TOOL_CALLS]")[0]
-                for msg in history:
-                    if msg.metadata is not None:
-                        msg.metadata['status'] = 'done'
-                if '[FinalAnswer]' in last_thought:
-                    parts = last_thought.split('[FinalAnswer]', 1)
-                    final_thought, final_answer = parts if len(parts) == 2 else (last_thought, "")
-                    history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                    yield history
-                    history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                    yield history
-                else:
-                    history.append(ChatMessage(role="assistant", content=last_thought))
-                    yield history
-                last_outputs.append(last_outputs_str)
-            if next_round:
-                if self.force_finish:
-                    last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
-                        conversation, temperature, max_new_tokens, max_token)
-                    if '[FinalAnswer]' in last_outputs_str:
-                        parts = last_outputs_str.split('[FinalAnswer]', 1)
-                        final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
-                        history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                        yield history
-                        history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                        yield history
-                    else:
-                        history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
-                        yield history
-                else:
-                    yield "Reasoning rounds exceeded."
-        except Exception as e:
-            logger.error(f"Exception in run_gradio_chat: {e}")
-            error_msg = f"An error occurred: {e}"
-            history.append(ChatMessage(role="assistant", content=error_msg))
-            yield history
-            if self.force_finish:
-                last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
-                    conversation, temperature, max_new_tokens, max_token)
-                if '[FinalAnswer]' in last_outputs_str:
-                    parts = last_outputs_str.split('[FinalAnswer]', 1)
-                    final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
-                    history.append(ChatMessage(role="assistant", content=final_thought.strip()))
-                    yield history
-                    history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
-                    yield history
-                else:
-                    history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
-                    yield history
-            return error_msg

     def __init__(self, model_name,
                  rag_model_name,
                  tool_files_dict=None,
+                 enable_finish=False,  # MODIFIED: Default to False
+                 enable_rag=False,
                  enable_summary=False,
                  init_rag_num=0,
+                 step_rag_num=0,
                  summary_mode='step',
                  summary_skip_last_k=0,
                  summary_context_length=None,
             if model_name == self.model_name:
                 return f"The model {model_name} is already loaded."
             self.model_name = model_name
+        self.model = LLM(model=self.model_name, enforce_eager=True, max_model_len=4096)  # MODIFIED: Reduce KV cache
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         return f"Model {model_name} loaded successfully."
     def add_special_tools(self, tools, call_agent=False):
         if not self.enable_rag and not self.enable_finish:
             return tools
+        if self.enable_finish and self.tooluniverse:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
             logger.info("Finish tool is added")
+        if call_agent and self.tooluniverse:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('CallAgent', return_prompt=True))
             logger.info("CallAgent tool is added")
+        elif self.enable_rag and self.tooluniverse:
             tools.append(self.tooluniverse.get_one_tool_by_one_name('Tool_RAG', return_prompt=True))
             logger.info("Tool_RAG tool is added")
+        if self.additional_default_tools is not None and self.tooluniverse:
             for each_tool_name in self.additional_default_tools:
                 tool_prompt = self.tooluniverse.get_one_tool_by_one_name(each_tool_name, return_prompt=True)
                 if tool_prompt is not None:
         return tools
     def add_finish_tools(self, tools):
+        if not self.enable_finish or not self.tooluniverse:
             return tools
         tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
         logger.info("Finish tool is added")
     def get_answer_based_on_unfinished_reasoning(self, conversation, temperature, max_new_tokens, max_token, outputs=None):
         if conversation[-1]['role'] == 'assistant':
             conversation.append({'role': 'tool', 'content': 'Errors occurred, provide final answer.'})
         last_outputs_str = self.llm_infer(messages=conversation,
                                          temperature=temperature,
+                                         tools=[],
                                          output_begin_string='[FinalAnswer]',
                                          skip_special_tokens=True,
                                          max_new_tokens=max_new_tokens, max_token=max_token)
             return "Invalid input."
         if message.startswith("[\U0001f9f0 Tool_RAG") or message.startswith("⚒️"):
             return ""
+        conversation = self.initialize_conversation(message, conversation, history=[])
+        sampling_params = SamplingParams(
+            temperature=temperature,
+            max_tokens=max_new_tokens,
+            seed=seed if seed is not None else self.seed,
+        )
+        prompt = self.chat_template.render(messages=conversation, tools=[], add_generation_prompt=True)
+        output = self.model.generate([prompt], sampling_params)[0].outputs[0].text  # MODIFIED: Direct inference
+        cleaned = clean_response(output)  # MODIFIED: Use clean_response
+        if '[FinalAnswer]' in cleaned:
+            parts = cleaned.split('[FinalAnswer]', 1)
+            final_answer = parts[1] if len(parts) > 1 else cleaned
+            history.append(ChatMessage(role="assistant", content=final_answer.strip()))
+        else:
+            history.append(ChatMessage(role="assistant", content=cleaned.strip()))
+        yield history
+        return cleaned
+def clean_response(text: str) -> str:  # MODIFIED: Add clean_response for compatibility
+    text = sanitize_utf8(text)
+    text = re.sub(r"\[TOOL_CALLS\].*?\n|\[.*?\].*?\n|(?:get_|tool\s|retrieve\s|use\s|rag\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
+    text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
+    text = re.sub(
+        r"(?i)(to\s|analyze|will\s|since\s|no\s|none|previous|attempt|involve|check\s|explore|manually|"
+        r"start|look|use|focus|retrieve|tool|based\s|overall|indicate|mention|consider|ensure|need\s|"
+        r"provide|review|assess|identify|potential|records|patient|history|symptoms|medication|"
+        r"conflict|assessment|follow-up|issue|reasoning|step|prompt|address|rag|thought|try|john\sdoe|nkma).*?\n",
+        "", text, flags=re.DOTALL
+    )
+    text = re.sub(r"\n{2,}", "\n", text).strip()
+    lines = []
+    valid_heading = False
+    for line in text.split("\n"):
+        line = line.strip()
+        if line.lower() in ["missed diagnoses:", "medication conflicts:", "incomplete assessments:", "urgent follow-up:"]:
+            valid_heading = True
+            lines.append(f"**{line[:-1]}**:")
+        elif valid_heading and line.startswith("-"):
+            lines.append(line)
+        else:
+            valid_heading = False
+    return "\n".join(lines).strip()