CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

9b6dc72

verified ·

1 Parent(s): 4712249

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +39 -75

src/txagent/txagent.py CHANGED Viewed

@@ -12,18 +12,17 @@ from tooluniverse import ToolUniverse
 from gradio import ChatMessage
 from .toolrag import ToolRAGModel
 import torch
-# near the top of txagent.py
 import logging
 logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
 from .utils import NoRepeatSentenceProcessor, ReasoningTraceChecker, tool_result_format
 class TxAgent:
     def __init__(self, model_name,
                  rag_model_name,
-                 tool_files_dict=None,  # None leads to the default tool files in ToolUniverse
                  enable_finish=True,
                  enable_rag=True,
                  enable_summary=False,
@@ -47,10 +46,11 @@ class TxAgent:
         self.model = None
         self.rag_model = ToolRAGModel(rag_model_name)
         self.tooluniverse = None
-        # self.tool_desc = None
-        self.prompt_multi_step = "You are a helpful assistant that will solve problems through detailed, step-by-step reasoning and actions based on your reasoning. Typically, your actions will use the provided functions. You have access to the following functions."
         self.self_prompt = "Strictly follow the instruction."
-        self.chat_prompt = "You are helpful assistant to chat with the user."
         self.enable_finish = enable_finish
         self.enable_rag = enable_rag
         self.enable_summary = enable_summary
@@ -145,7 +145,7 @@ class TxAgent:
                  existing_tools_prompt=[],
                  rag_num=5,
                  return_call_result=False):
-        extra_factor = 30  # Factor to retrieve more than rag_num
         if picked_tool_names is None:
             assert picked_tool_names is not None or message is not None
             picked_tool_names = self.rag_infer(
@@ -270,7 +270,6 @@ class TxAgent:
             "tool_calls": json.dumps(function_call_json)
         }] + call_results
-        # Yield the final result.
         return revised_messages, existing_tools_prompt, special_tool_call
     def run_function_call_stream(self, fcall_str,
@@ -364,11 +363,10 @@ class TxAgent:
         else:
             return revised_messages, existing_tools_prompt, special_tool_call
     def get_answer_based_on_unfinished_reasoning(self, conversation, temperature, max_new_tokens, max_token, outputs=None):
-        if conversation[-1]['role'] == 'assisant':
             conversation.append(
-                {'role': 'tool', 'content': 'Errors happen during the function call, please come up with the final answer with the current information.'})
         finish_tools_prompt = self.add_finish_tools([])
         last_outputs_str = self.llm_infer(messages=conversation,
@@ -387,15 +385,6 @@ class TxAgent:
                             max_round: int = 20,
                             call_agent=False,
                             call_agent_level=0) -> str:
-        """
-        Generate a streaming response using the llama3-8b model.
-        Args:
-            message (str): The input message.
-            temperature (float): The temperature for generating the response.
-            max_new_tokens (int): The maximum number of new tokens to generate.
-        Returns:
-            str: The generated response.
-        """
         print("\033[1;32;40mstart\033[0m")
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
@@ -454,7 +443,6 @@ class TxAgent:
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
                     if not good_status:
-                        next_round = False
                         print(
                             "Internal error in reasoning: " + wrong_info)
                         break
@@ -489,7 +477,6 @@ class TxAgent:
                 return None
     def build_logits_processor(self, messages, llm):
-        # Use the tokenizer from the LLM instance.
         tokenizer = llm.get_tokenizer()
         if self.avoid_repeat and len(messages) > 2:
             assistant_messages = []
@@ -516,7 +503,6 @@ class TxAgent:
         sampling_params = SamplingParams(
             temperature=temperature,
             max_tokens=max_new_tokens,
             seed=seed if seed is not None else self.seed,
         )
@@ -527,18 +513,23 @@ class TxAgent:
         if check_token_status and max_token is not None:
             token_overflow = False
-            num_input_tokens = len(self.tokenizer.encode(
-                prompt, return_tensors="pt")[0])
-            if max_token is not None:
-                if num_input_tokens > max_token:
                     torch.cuda.empty_cache()
                     gc.collect()
-                    print("Number of input tokens before inference:",
-                          num_input_tokens)
-                    logger.info(
-                        "The number of tokens exceeds the maximum limit!!!!")
-                    token_overflow = True
                     return None, token_overflow
         output = model.generate(
             prompt,
             sampling_params=sampling_params,
@@ -641,17 +632,6 @@ Generate **one summarized sentence** about "function calls' responses" with nece
         return output
     def function_result_summary(self, input_list, status, enable_summary):
-        """
-        Processes the input list, extracting information from sequences of 'user', 'tool', 'assistant' roles.
-        Supports 'length' and 'step' modes, and skips the last 'k' groups.
-        Parameters:
-            input_list (list): A list of dictionaries containing role and other information.
-            summary_skip_last_k (int): Number of groups to skip from the end. Defaults to 0.
-            summary_context_length (int): The context length threshold for the 'length' mode.
-            last_processed_index (tuple or int): The last processed index.
-        Returns:
-            list: A list of extracted information from valid sequences.
-        """
         if 'tool_call_step' not in status:
             status['tool_call_step'] = 0
@@ -748,15 +728,11 @@ Generate **one summarized sentence** about "function calls' responses" with nece
         return status
-    # Following are Gradio related functions
-    # General update method that accepts any new arguments through kwargs
     def update_parameters(self, **kwargs):
         for key, value in kwargs.items():
             if hasattr(self, key):
                 setattr(self, key, value)
-        # Return the updated attributes
         updated_attributes = {key: value for key,
                               value in kwargs.items() if hasattr(self, key)}
         return updated_attributes
@@ -795,7 +771,6 @@ Generate **one summarized sentence** about "function calls' responses" with nece
             return ""
         outputs = []
-        outputs_str = ''
         last_outputs = []
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
@@ -867,7 +842,7 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
                     if not good_status:
-                        print("Checker flagged reasoning error: ", wrong_info)
                         break
                 last_outputs = []
@@ -884,18 +859,11 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                 logger.debug(f"llm_infer output: {last_outputs_str[:100] if last_outputs_str else None}, token_overflow: {token_overflow}")
                 if last_outputs_str is None:
-                    logger.warning("llm_infer returned None due to token overflow")
-                    if self.force_finish:
-                        last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
-                            conversation, temperature, max_new_tokens, max_token)
-                        history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
-                        yield history
-                        return last_outputs_str
-                    else:
-                        error_msg = "Token limit exceeded. Please reduce input size or increase max_token."
-                        history.append(ChatMessage(role="assistant", content=error_msg))
-                        yield history
-                        return error_msg
                 last_thought = last_outputs_str.split("[TOOL_CALLS]")[0]
@@ -905,14 +873,12 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                 if '[FinalAnswer]' in last_thought:
                     parts = last_thought.split('[FinalAnswer]', 1)
-                    if len(parts) == 2:
-                        final_thought, final_answer = parts
-                    else:
-                        final_thought, final_answer = last_thought, ""
                     history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                     yield history
                     history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
                     yield history
                 else:
                     history.append(ChatMessage(role="assistant", content=last_thought))
                     yield history
@@ -920,15 +886,13 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                 last_outputs.append(last_outputs_str)
             if next_round:
                 if self.force_finish:
                     last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                         conversation, temperature, max_new_tokens, max_token)
                     if '[FinalAnswer]' in last_outputs_str:
                         parts = last_outputs_str.split('[FinalAnswer]', 1)
-                        if len(parts) == 2:
-                            final_thought, final_answer = parts
-                        else:
-                            final_thought, final_answer = last_outputs_str, ""
                         history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                         yield history
                         history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
@@ -937,7 +901,10 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                         history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
                         yield history
                 else:
-                    yield "The number of reasoning rounds exceeded the limit."
         except Exception as e:
             logger.error(f"Exception in run_gradio_chat: {e}", exc_info=True)
@@ -949,10 +916,7 @@ Generate **one summarized sentence** about "function calls' responses" with nece
                     conversation, temperature, max_new_tokens, max_token)
                 if '[FinalAnswer]' in last_outputs_str:
                     parts = last_outputs_str.split('[FinalAnswer]', 1)
-                    if len(parts) == 2:
-                        final_thought, final_answer = parts
-                    else:
-                        final_thought, final_answer = last_outputs_str, ""
                     history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                     yield history
                     history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))

 from gradio import ChatMessage
 from .toolrag import ToolRAGModel
 import torch
 import logging
 logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 from .utils import NoRepeatSentenceProcessor, ReasoningTraceChecker, tool_result_format
 class TxAgent:
     def __init__(self, model_name,
                  rag_model_name,
+                 tool_files_dict=None,
                  enable_finish=True,
                  enable_rag=True,
                  enable_summary=False,
         self.model = None
         self.rag_model = ToolRAGModel(rag_model_name)
         self.tooluniverse = None
+        self.prompt_multi_step = ("You are a helpful assistant that solves problems through detailed, step-by-step reasoning "
+                                 "and actions based on your reasoning. Provide comprehensive and clinically precise responses, "
+                                 "including specific diagnoses, tools, and actionable recommendations when analyzing medical data.")
         self.self_prompt = "Strictly follow the instruction."
+        self.chat_prompt = "You are a helpful assistant to chat with the user."
         self.enable_finish = enable_finish
         self.enable_rag = enable_rag
         self.enable_summary = enable_summary
                  existing_tools_prompt=[],
                  rag_num=5,
                  return_call_result=False):
+        extra_factor = 30
         if picked_tool_names is None:
             assert picked_tool_names is not None or message is not None
             picked_tool_names = self.rag_infer(
             "tool_calls": json.dumps(function_call_json)
         }] + call_results
         return revised_messages, existing_tools_prompt, special_tool_call
     def run_function_call_stream(self, fcall_str,
         else:
             return revised_messages, existing_tools_prompt, special_tool_call
     def get_answer_based_on_unfinished_reasoning(self, conversation, temperature, max_new_tokens, max_token, outputs=None):
+        if conversation[-1]['role'] == 'assistant':
             conversation.append(
+                {'role': 'tool', 'content': 'Errors happened during the function call, please come up with the final answer with the current information.'})
         finish_tools_prompt = self.add_finish_tools([])
         last_outputs_str = self.llm_infer(messages=conversation,
                             max_round: int = 20,
                             call_agent=False,
                             call_agent_level=0) -> str:
         print("\033[1;32;40mstart\033[0m")
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
             call_agent, call_agent_level, message)
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
                     if not good_status:
                         print(
                             "Internal error in reasoning: " + wrong_info)
                         break
                 return None
     def build_logits_processor(self, messages, llm):
         tokenizer = llm.get_tokenizer()
         if self.avoid_repeat and len(messages) > 2:
             assistant_messages = []
         sampling_params = SamplingParams(
             temperature=temperature,
             max_tokens=max_new_tokens,
             seed=seed if seed is not None else self.seed,
         )
         if check_token_status and max_token is not None:
             token_overflow = False
+            input_tokens = self.tokenizer.encode(prompt, return_tensors="pt")[0]
+            num_input_tokens = len(input_tokens)
+            if num_input_tokens > max_token:
+                logger.info(f"Number of input tokens before inference: {num_input_tokens}")
+                logger.info("The number of tokens exceeds the maximum limit!!!!")
+                max_prompt_tokens = max_token - max_new_tokens - 100
+                if max_prompt_tokens > 0:
+                    truncated_input = self.tokenizer.decode(input_tokens[:max_prompt_tokens])
+                    prompt = truncated_input
+                    logger.info(f"Prompt truncated to {len(self.tokenizer.encode(prompt, return_tensors='pt')[0])} tokens")
+                    token_overflow = True
+                else:
+                    logger.warning("Max prompt tokens too small, cannot truncate effectively")
                     torch.cuda.empty_cache()
                     gc.collect()
                     return None, token_overflow
         output = model.generate(
             prompt,
             sampling_params=sampling_params,
         return output
     def function_result_summary(self, input_list, status, enable_summary):
         if 'tool_call_step' not in status:
             status['tool_call_step'] = 0
         return status
     def update_parameters(self, **kwargs):
         for key, value in kwargs.items():
             if hasattr(self, key):
                 setattr(self, key, value)
         updated_attributes = {key: value for key,
                               value in kwargs.items() if hasattr(self, key)}
         return updated_attributes
             return ""
         outputs = []
         last_outputs = []
         picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
                 if self.enable_checker:
                     good_status, wrong_info = checker.check_conversation()
                     if not good_status:
+                        logger.warning(f"Checker flagged reasoning error: {wrong_info}")
                         break
                 last_outputs = []
                 logger.debug(f"llm_infer output: {last_outputs_str[:100] if last_outputs_str else None}, token_overflow: {token_overflow}")
                 if last_outputs_str is None:
+                    logger.warning("llm_infer returned None, likely due to token overflow")
+                    error_msg = "Error: Unable to generate response due to token limit. Please reduce input size."
+                    history.append(ChatMessage(role="assistant", content=error_msg))
+                    yield history
+                    return error_msg
                 last_thought = last_outputs_str.split("[TOOL_CALLS]")[0]
                 if '[FinalAnswer]' in last_thought:
                     parts = last_thought.split('[FinalAnswer]', 1)
+                    final_thought, final_answer = parts if len(parts) == 2 else (last_thought, "")
                     history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                     yield history
                     history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
                     yield history
+                    next_round = False
                 else:
                     history.append(ChatMessage(role="assistant", content=last_thought))
                     yield history
                 last_outputs.append(last_outputs_str)
             if next_round:
+                logger.info("Max rounds reached, forcing finish")
                 if self.force_finish:
                     last_outputs_str = self.get_answer_based_on_unfinished_reasoning(
                         conversation, temperature, max_new_tokens, max_token)
                     if '[FinalAnswer]' in last_outputs_str:
                         parts = last_outputs_str.split('[FinalAnswer]', 1)
+                        final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
                         history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                         yield history
                         history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))
                         history.append(ChatMessage(role="assistant", content=last_outputs_str.strip()))
                         yield history
                 else:
+                    error_msg = "The number of reasoning rounds exceeded the limit."
+                    history.append(ChatMessage(role="assistant", content=error_msg))
+                    yield history
+                    return error_msg
         except Exception as e:
             logger.error(f"Exception in run_gradio_chat: {e}", exc_info=True)
                     conversation, temperature, max_new_tokens, max_token)
                 if '[FinalAnswer]' in last_outputs_str:
                     parts = last_outputs_str.split('[FinalAnswer]', 1)
+                    final_thought, final_answer = parts if len(parts) == 2 else (last_outputs_str, "")
                     history.append(ChatMessage(role="assistant", content=final_thought.strip()))
                     yield history
                     history.append(ChatMessage(role="assistant", content="**🧠 Final Analysis:**\n" + final_answer.strip()))