Spaces:

RocketFarmStudios
/

TxAgent-Api

Runtime error

App Files Files

xet

Community

Ali2206 commited on May 18

Commit

bfcd4eb

verified ·

1 Parent(s): 32e4e6a

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +100 -379

src/txagent/txagent.py CHANGED Viewed

@@ -5,7 +5,7 @@ import gc
 import numpy as np
 from vllm import LLM, SamplingParams
 from jinja2 import Template
-from typing import List, Dict, Optional, Union, Generator
 import types
 from tooluniverse import ToolUniverse
 from .toolrag import ToolRAGModel
@@ -40,25 +40,6 @@ class TxAgent:
                  additional_default_tools: Optional[List] = None):
         """
         Initialize the TxAgent with specified configuration.
-        Args:
-            model_name: Name of the main LLM model
-            rag_model_name: Name of the RAG model
-            tool_files_dict: Dictionary of tool files
-            enable_finish: Whether to enable the Finish tool
-            enable_rag: Whether to enable RAG functionality
-            enable_summary: Whether to enable summarization
-            init_rag_num: Initial number of RAG tools to retrieve
-            step_rag_num: Number of RAG tools to retrieve per step
-            summary_mode: Mode for summarization ('step' or 'length')
-            summary_skip_last_k: Number of last steps to skip in summarization
-            summary_context_length: Context length threshold for summarization
-            force_finish: Whether to force finish when max rounds reached
-            avoid_repeat: Whether to avoid repeating similar responses
-            seed: Random seed for reproducibility
-            enable_checker: Whether to enable reasoning trace checker
-            enable_chat: Whether to enable chat mode
-            additional_default_tools: Additional tools to include by default
         """
         self.model_name = model_name
         self.tokenizer = None
@@ -94,12 +75,6 @@ class TxAgent:
     def load_models(self, model_name: Optional[str] = None) -> str:
         """
         Load the specified model or the default model if none specified.
-        Args:
-            model_name: Name of the model to load
-        Returns:
-            Status message indicating success or failure
         """
         if model_name is not None:
             if model_name == self.model_name:
@@ -140,123 +115,97 @@ class TxAgent:
             logger.error("Failed to load tools: %s", str(e))
             raise RuntimeError(f"Failed to load tools: {str(e)}")
-    def load_tool_desc_embedding(self) -> None:
-        """Load tool description embeddings from cache or generate new ones."""
-        cache_path = os.path.join(os.path.dirname(self.tool_files_dict["new_tool"]), "tool_embeddings.pkl")
-        try:
-            if os.path.exists(cache_path):
-                self.rag_model.load_cached_embeddings(cache_path)
-            else:
-                self.rag_model.load_tool_desc_embedding(self.tooluniverse)
-                self.rag_model.save_embeddings(cache_path)
-            logger.info("Tool description embeddings loaded successfully")
-        except Exception as e:
-            logger.error("Failed to load tool embeddings: %s", str(e))
-            raise RuntimeError(f"Failed to load tool embeddings: {str(e)}")
-    def rag_infer(self, query: str, top_k: int = 5) -> List[str]:
-        """
-        Perform RAG inference to retrieve relevant tools.
-        Args:
-            query: The query to search for
-            top_k: Number of top results to return
-        Returns:
-            List of relevant tool names
-        """
-        if not self.enable_rag:
-            return []
-        return self.rag_model.rag_infer(query, top_k)
-    def initialize_conversation(self,
-                              message: str,
-                              conversation: Optional[List[Dict]] = None,
-                              history: Optional[List[Dict]] = None) -> List[Dict]:
         """
-        Initialize or extend a conversation with the given message and history.
-        Args:
-            message: The new message to add
-            conversation: Existing conversation to extend
-            history: Chat history to incorporate
-        Returns:
-            Updated conversation list
         """
-        if conversation is None:
-            conversation = []
         conversation = self.set_system_prompt(conversation, self.prompt_multi_step)
-        if history:
-            for msg in history:
-                if msg['role'] == 'user':
-                    conversation.append({"role": "user", "content": msg['content']})
-                elif msg['role'] == 'assistant':
-                    conversation.append({"role": "assistant", "content": msg['content']})
         conversation.append({"role": "user", "content": message})
-        logger.debug("Conversation initialized with %d messages", len(conversation))
-        return conversation
-    def tool_RAG(self,
-                 message: Optional[str] = None,
-                 picked_tool_names: Optional[List[str]] = None,
-                 existing_tools_prompt: List = [],
-                 rag_num: int = 0,
-                 return_call_result: bool = False) -> Union[List, Tuple[List, List]]:
-        """
-        Retrieve relevant tools using RAG.
-        Args:
-            message: The query message for RAG
-            picked_tool_names: Pre-selected tool names
-            existing_tools_prompt: Existing tools to include
-            rag_num: Number of tools to retrieve
-            return_call_result: Whether to return tool names
-        Returns:
-            List of tool prompts or tuple with tool names if return_call_result is True
-        """
-        if not self.enable_rag:
-            return [] if not return_call_result else ([], [])
-        extra_factor = 10
-        if picked_tool_names is None:
-            if message is None:
-                raise ValueError("Either message or picked_tool_names must be provided")
-            picked_tool_names = self.rag_infer(message, top_k=rag_num * extra_factor)
-        picked_tool_names_no_special = [
-            tool for tool in picked_tool_names
-            if tool not in self.special_tools_name
-        ]
-        picked_tool_names = picked_tool_names_no_special[:rag_num]
-        picked_tools = self.tooluniverse.get_tool_by_name(picked_tool_names)
-        picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
-        logger.debug("Retrieved %d tools via RAG", len(picked_tools_prompt))
-        if return_call_result:
-            return picked_tools_prompt, picked_tool_names
-        return picked_tools_prompt
-    def add_special_tools(self, tools: List, call_agent: bool = False) -> List:
-        """Add special tools (Finish and optionally CallAgent) to the tools list."""
-        if self.enable_finish:
-            tools.append(self.tooluniverse.get_one_tool_by_one_name('Finish', return_prompt=True))
-            logger.debug("Finish tool added")
-        if call_agent:
-            tools.append(self.tooluniverse.get_one_tool_by_one_name('CallAgent', return_prompt=True))
-            logger.debug("CallAgent tool added")
-        return tools
-    def set_system_prompt(self, conversation: List[Dict], sys_prompt: str) -> List[Dict]:
-        """Set or update the system prompt in the conversation."""
-        if not conversation:
-            conversation.append({"role": "system", "content": sys_prompt})
-        else:
-            conversation[0] = {"role": "system", "content": sys_prompt}
-        return conversation
     def run_function_call(self,
                          fcall_str: str,
@@ -268,18 +217,6 @@ class TxAgent:
                          temperature: Optional[float] = None) -> Tuple[List[Dict], List, str]:
         """
         Execute function calls from the model's output.
-        Args:
-            fcall_str: The function call string from the model
-            return_message: Whether to return the message part
-            existing_tools_prompt: Existing tools to consider
-            message_for_call_agent: Original message for CallAgent
-            call_agent: Whether CallAgent is enabled
-            call_agent_level: Current CallAgent level
-            temperature: Temperature for sub-agent calls
-        Returns:
-            Tuple of (revised_messages, tools_prompt, special_tool_call)
         """
         try:
             function_call_json, message = self.tooluniverse.extract_function_call_json(
@@ -304,11 +241,15 @@ class TxAgent:
                             full_message = (
                                 (message_for_call_agent or "") +
                                 "\nYou must follow the following plan to answer the question: " +
-                                str(solution_plan)
                             call_result = self.run_multistep_agent(
-                                full_message, temperature=temperature,
-                                max_new_tokens=512, max_token=131072,
-                                call_agent=False, call_agent_level=call_agent_level)
                             if call_result is None:
                                 call_result = "⚠️ No content returned from sub-agent."
                             else:
@@ -317,6 +258,7 @@ class TxAgent:
                             call_result = "Error: CallAgent disabled."
                     else:
                         call_result = self.tooluniverse.run_one_function(function_call_json[i])
                     call_id = self.tooluniverse.call_id_gen()
                     function_call_json[i]["call_id"] = call_id
                     logger.info("Tool Call Result: %s", call_result)
@@ -328,45 +270,27 @@ class TxAgent:
                             "call_id": call_id
                         })
                     })
-        else:
-            call_results.append({
-                "role": "tool",
-                "content": json.dumps({"content": "Invalid or no function call detected."})
-            })
         revised_messages = [{
             "role": "assistant",
             "content": message.strip(),
             "tool_calls": json.dumps(function_call_json)
         }] + call_results
         return revised_messages, existing_tools_prompt or [], special_tool_call
     def llm_infer(self,
-                  messages: List[Dict],
-                  temperature: float = 0.1,
-                  tools: Optional[List] = None,
-                  output_begin_string: Optional[str] = None,
-                  max_new_tokens: int = 512,
-                  max_token: int = 131072,
-                  skip_special_tokens: bool = True,
-                  model: Optional[LLM] = None,
-                  check_token_status: bool = False) -> Union[str, Tuple[str, bool]]:
         """
         Perform inference using the LLM.
-        Args:
-            messages: Conversation history
-            temperature: Sampling temperature
-            tools: List of tools to include
-            output_begin_string: Prefix for output
-            max_new_tokens: Maximum new tokens to generate
-            max_token: Maximum total tokens allowed
-            skip_special_tokens: Whether to skip special tokens
-            model: Optional custom model to use
-            check_token_status: Whether to check token limits
-        Returns:
-            Generated text or tuple with text and overflow flag if check_token_status
         """
         model = model or self.model
         tokenizer = self.tokenizer
@@ -409,209 +333,6 @@ class TxAgent:
             logger.error("Inference failed: %s", str(e))
             raise RuntimeError(f"Inference failed: {str(e)}")
-    def run_multistep_agent(self,
-                           message: str,
-                           temperature: float,
-                           max_new_tokens: int,
-                           max_token: int,
-                           max_round: int = 5,
-                           call_agent: bool = False,
-                           call_agent_level: int = 0) -> Optional[str]:
-        """
-        Run multi-step reasoning with the agent.
-        Args:
-            message: Input message
-            temperature: Sampling temperature
-            max_new_tokens: Max new tokens per step
-            max_token: Max total tokens
-            max_round: Maximum reasoning rounds
-            call_agent: Whether to enable CallAgent
-            call_agent_level: Current CallAgent level
-        Returns:
-            Final answer or None if failed
-        """
-        logger.info("Starting multistep agent for message: %s", message[:100])
-        picked_tools_prompt, call_agent_level = self.initialize_tools_prompt(
-            call_agent, call_agent_level, message)
-        conversation = self.initialize_conversation(message)
-        outputs = []
-        last_outputs = []
-        next_round = True
-        current_round = 0
-        token_overflow = False
-        enable_summary = False
-        last_status = {}
-        while next_round and current_round < max_round:
-            current_round += 1
-            if len(outputs) > 0:
-                function_call_messages, picked_tools_prompt, special_tool_call = self.run_function_call(
-                    last_outputs, return_message=True,
-                    existing_tools_prompt=picked_tools_prompt,
-                    message_for_call_agent=message,
-                    call_agent=call_agent,
-                    call_agent_level=call_agent_level,
-                    temperature=temperature)
-                if special_tool_call == 'Finish':
-                    next_round = False
-                    conversation.extend(function_call_messages)
-                    content = function_call_messages[0]['content']
-                    if content is None:
-                        return "❌ No content returned after Finish tool call."
-                    return content.split('[FinalAnswer]')[-1]
-                if (self.enable_summary or token_overflow) and not call_agent:
-                    enable_summary = True
-                last_status = self.function_result_summary(
-                    conversation, status=last_status, enable_summary=enable_summary)
-                if function_call_messages:
-                    conversation.extend(function_call_messages)
-                    outputs.append(tool_result_format(function_call_messages))
-                else:
-                    next_round = False
-                    conversation.extend([{"role": "assistant", "content": ''.join(last_outputs)}])
-                    return ''.join(last_outputs).replace("</s>", "")
-            last_outputs = []
-            outputs.append("### TxAgent:\n")
-            last_outputs_str, token_overflow = self.llm_infer(
-                messages=conversation,
-                temperature=temperature,
-                tools=picked_tools_prompt,
-                skip_special_tokens=False,
-                max_new_tokens=2048,
-                max_token=131072,
-                check_token_status=True)
-            if last_outputs_str is None:
-                logger.warning("Token limit exceeded")
-                if self.force_finish:
-                    return self.get_answer_based_on_unfinished_reasoning(
-                        conversation, temperature, max_new_tokens, max_token)
-                return "❌ Token limit exceeded."
-            last_outputs.append(last_outputs_str)
-        if max_round == current_round:
-            logger.warning("Max rounds exceeded")
-        if self.force_finish:
-            return self.get_answer_based_on_unfinished_reasoning(
-                conversation, temperature, max_new_tokens, max_token)
-        return None
-    def analyze_document(self,
-                        file_path: str,
-                        temperature: float = 0.1,
-                        max_new_tokens: int = 2048,
-                        max_token: int = 131072) -> Dict[str, Union[str, List]]:
-        """
-        Analyze a document and return structured results.
-        Args:
-            file_path: Path to the document
-            temperature: Sampling temperature
-            max_new_tokens: Max new tokens per step
-            max_token: Max total tokens
-        Returns:
-            Dictionary with analysis results
-        """
-        logger.info("Starting document analysis for: %s", file_path)
-        start_time = time.time()
-        try:
-            extracted_text = self.extract_text(file_path)
-            if not extracted_text:
-                raise ValueError("Could not extract text from document")
-            chunks = self.split_text(extracted_text)
-            batches = self.batch_chunks(chunks, batch_size=1)
-            batch_results = []
-            for batch in batches:
-                prompt = "\n\n".join(self.build_prompt(chunk) for chunk in batch)
-                response = self.run_multistep_agent(
-                    prompt,
-                    temperature=temperature,
-                    max_new_tokens=max_new_tokens,
-                    max_token=max_token,
-                    call_agent=False
-                )
-                batch_results.append(self.clean_response(response or "No response"))
-            combined = "\n\n".join([res for res in batch_results if not res.startswith("❌")])
-            if not combined:
-                raise ValueError("No valid batch responses generated")
-            final_summary = self.generate_final_summary(self, combined)
-            return {
-                "status": "success",
-                "summary": final_summary,
-                "batch_results": batch_results,
-                "processing_time": time.time() - start_time
-            }
-        except Exception as e:
-            logger.error("Document analysis failed: %s", str(e))
-            return {
-                "status": "error",
-                "message": str(e),
-                "processing_time": time.time() - start_time
-            }
-    def get_answer_based_on_unfinished_reasoning(self,
-                                                conversation: List[Dict],
-                                                temperature: float,
-                                                max_new_tokens: int,
-                                                max_token: int) -> str:
-        """
-        Generate a final answer when reasoning is incomplete.
-        Args:
-            conversation: Current conversation history
-            temperature: Sampling temperature
-            max_new_tokens: Max new tokens
-            max_token: Max total tokens
-        Returns:
-            Final answer string
-        """
-        if conversation[-1]['role'] == 'assistant':
-            conversation.append(
-                {'role': 'tool', 'content': 'Errors occurred during function call; provide final answer with current information.'})
-        finish_tools_prompt = self.add_finish_tools([])
-        last_outputs_str = self.llm_infer(
-            messages=conversation,
-            temperature=temperature,
-            tools=finish_tools_prompt,
-            output_begin_string='[FinalAnswer]',
-            skip_special_tokens=True,
-            max_new_tokens=max_new_tokens,
-            max_token=max_token)
-        logger.info("Unfinished reasoning answer: %s", last_outputs_str[:100])
-        return last_outputs_str
-    def update_parameters(self, **kwargs) -> Dict:
-        """
-        Update agent parameters dynamically.
-        Args:
-            kwargs: Parameter names and values to update
-        Returns:
-            Dictionary of updated parameters
-        """
-        updated_attributes = {}
-        for key, value in kwargs.items():
-            if hasattr(self, key):
-                setattr(self, key, value)
-                updated_attributes[key] = value
-        logger.info("Updated parameters: %s", updated_attributes)
-        return updated_attributes
     def cleanup(self) -> None:
         """Clean up resources and clear memory."""
         if hasattr(self, 'model'):

 import numpy as np
 from vllm import LLM, SamplingParams
 from jinja2 import Template
+from typing import List, Dict, Optional, Union, Tuple, Generator
 import types
 from tooluniverse import ToolUniverse
 from .toolrag import ToolRAGModel
                  additional_default_tools: Optional[List] = None):
         """
         Initialize the TxAgent with specified configuration.
         """
         self.model_name = model_name
         self.tokenizer = None
     def load_models(self, model_name: Optional[str] = None) -> str:
         """
         Load the specified model or the default model if none specified.
         """
         if model_name is not None:
             if model_name == self.model_name:
             logger.error("Failed to load tools: %s", str(e))
             raise RuntimeError(f"Failed to load tools: {str(e)}")
+    def run_multistep_agent(self,
+                          message: str,
+                          temperature: float,
+                          max_new_tokens: int,
+                          max_token: int,
+                          max_round: int = 5,
+                          call_agent: bool = False,
+                          call_agent_level: int = 0) -> Optional[str]:
         """
+        Run multi-step reasoning with the agent.
         """
+        logger.info("Starting multistep agent for message: %s", message[:100])
+        picked_tools_prompt = []
+        call_agent_level = 0
+        if call_agent:
+            call_agent_level += 1
+            if call_agent_level >= 2:
+                call_agent = False
+        conversation = []
         conversation = self.set_system_prompt(conversation, self.prompt_multi_step)
         conversation.append({"role": "user", "content": message})
+        outputs = []
+        last_outputs = []
+        next_round = True
+        current_round = 0
+        token_overflow = False
+        enable_summary = False
+        last_status = {}
+        while next_round and current_round < max_round:
+            current_round += 1
+            if len(outputs) > 0:
+                function_call_messages, picked_tools_prompt, special_tool_call = self.run_function_call(
+                    last_outputs,
+                    return_message=True,
+                    existing_tools_prompt=picked_tools_prompt,
+                    message_for_call_agent=message,
+                    call_agent=call_agent,
+                    call_agent_level=call_agent_level,
+                    temperature=temperature
+                )
+                if special_tool_call == 'Finish':
+                    next_round = False
+                    conversation.extend(function_call_messages)
+                    content = function_call_messages[0]['content']
+                    if content is None:
+                        return "❌ No content returned after Finish tool call."
+                    return content.split('[FinalAnswer]')[-1]
+                if (self.enable_summary or token_overflow) and not call_agent:
+                    enable_summary = True
+                last_status = self.function_result_summary(
+                    conversation, status=last_status, enable_summary=enable_summary)
+                if function_call_messages:
+                    conversation.extend(function_call_messages)
+                    outputs.append(tool_result_format(function_call_messages))
+                else:
+                    next_round = False
+                    conversation.extend([{"role": "assistant", "content": ''.join(last_outputs)}])
+                    return ''.join(last_outputs).replace("</s>", "")
+            last_outputs = []
+            outputs.append("### TxAgent:\n")
+            last_outputs_str, token_overflow = self.llm_infer(
+                messages=conversation,
+                temperature=temperature,
+                tools=picked_tools_prompt,
+                skip_special_tokens=False,
+                max_new_tokens=2048,
+                max_token=131072,
+                check_token_status=True)
+            if last_outputs_str is None:
+                logger.warning("Token limit exceeded")
+                if self.force_finish:
+                    return self.get_answer_based_on_unfinished_reasoning(
+                        conversation, temperature, max_new_tokens, max_token)
+                return "❌ Token limit exceeded."
+            last_outputs.append(last_outputs_str)
+        if max_round == current_round:
+            logger.warning("Max rounds exceeded")
+        if self.force_finish:
+            return self.get_answer_based_on_unfinished_reasoning(
+                conversation, temperature, max_new_tokens, max_token)
+        return None
     def run_function_call(self,
                          fcall_str: str,
                          temperature: Optional[float] = None) -> Tuple[List[Dict], List, str]:
         """
         Execute function calls from the model's output.
         """
         try:
             function_call_json, message = self.tooluniverse.extract_function_call_json(
                             full_message = (
                                 (message_for_call_agent or "") +
                                 "\nYou must follow the following plan to answer the question: " +
+                                str(solution_plan))
                             call_result = self.run_multistep_agent(
+                                full_message,
+                                temperature=temperature,
+                                max_new_tokens=512,
+                                max_token=131072,
+                                call_agent=False,
+                                call_agent_level=call_agent_level
+                            )
                             if call_result is None:
                                 call_result = "⚠️ No content returned from sub-agent."
                             else:
                             call_result = "Error: CallAgent disabled."
                     else:
                         call_result = self.tooluniverse.run_one_function(function_call_json[i])
                     call_id = self.tooluniverse.call_id_gen()
                     function_call_json[i]["call_id"] = call_id
                     logger.info("Tool Call Result: %s", call_result)
                             "call_id": call_id
                         })
                     })
         revised_messages = [{
             "role": "assistant",
             "content": message.strip(),
             "tool_calls": json.dumps(function_call_json)
         }] + call_results
         return revised_messages, existing_tools_prompt or [], special_tool_call
     def llm_infer(self,
+                 messages: List[Dict],
+                 temperature: float = 0.1,
+                 tools: Optional[List] = None,
+                 output_begin_string: Optional[str] = None,
+                 max_new_tokens: int = 512,
+                 max_token: int = 131072,
+                 skip_special_tokens: bool = True,
+                 model: Optional[LLM] = None,
+                 check_token_status: bool = False) -> Union[str, Tuple[str, bool]]:
         """
         Perform inference using the LLM.
         """
         model = model or self.model
         tokenizer = self.tokenizer
             logger.error("Inference failed: %s", str(e))
             raise RuntimeError(f"Inference failed: {str(e)}")
     def cleanup(self) -> None:
         """Clean up resources and clear memory."""
         if hasattr(self, 'model'):