Spaces:

RocketFarmStudios
/

TxAgent-Api

Paused

App Files Files Community

Ali2206 commited on May 18

Commit

cf95a11

verified ·

1 Parent(s): 6f88317

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +76 -304

src/txagent/txagent.py CHANGED Viewed

@@ -1,348 +1,120 @@
 import os
-import sys
 import json
-import gc
-import numpy as np
-from vllm import LLM, SamplingParams
-from jinja2 import Template
-from typing import List, Dict, Optional, Union, Tuple, Generator
-import types
-from tooluniverse import ToolUniverse
-from .toolrag import ToolRAGModel
-import torch
 import logging
-from datetime import datetime
-# Configure logging with a more specific logger name
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger("TxAgent")
-from .utils import NoRepeatSentenceProcessor, ReasoningTraceChecker, tool_result_format
 class TxAgent:
     def __init__(self,
                  model_name: str,
                  rag_model_name: str,
                  tool_files_dict: Optional[Dict] = None,
-                 enable_finish: bool = True,
-                 enable_rag: bool = False,
-                 enable_summary: bool = False,
-                 init_rag_num: int = 0,
-                 step_rag_num: int = 0,
-                 summary_mode: str = 'step',
-                 summary_skip_last_k: int = 0,
-                 summary_context_length: Optional[int] = None,
                  force_finish: bool = True,
-                 avoid_repeat: bool = True,
-                 seed: Optional[int] = None,
-                 enable_checker: bool = False,
-                 enable_chat: bool = False,
-                 additional_default_tools: Optional[List] = None):
-        """
-        Initialize the TxAgent with specified configuration.
-        """
         self.model_name = model_name
-        self.tokenizer = None
         self.rag_model_name = rag_model_name
         self.tool_files_dict = tool_files_dict or {}
-        self.model = None
-        self.rag_model = ToolRAGModel(rag_model_name)
-        self.tooluniverse = None
-        self.prompt_multi_step = "You are a helpful assistant that solves problems through step-by-step reasoning."
-        self.self_prompt = "Strictly follow the instruction."
-        self.chat_prompt = "You are a helpful assistant for user chat."
-        self.enable_finish = enable_finish
-        self.enable_rag = enable_rag
-        self.enable_summary = enable_summary
-        self.summary_mode = summary_mode
-        self.summary_skip_last_k = summary_skip_last_k
-        self.summary_context_length = summary_context_length
-        self.init_rag_num = init_rag_num
-        self.step_rag_num = step_rag_num
         self.force_finish = force_finish
-        self.avoid_repeat = avoid_repeat
-        self.seed = seed
         self.enable_checker = enable_checker
-        self.additional_default_tools = additional_default_tools or []
-        logger.info("TxAgent initialized with model: %s, RAG: %s", model_name, rag_model_name)
-    def init_model(self) -> None:
-        """Initialize both the main model and tool universe."""
         self.load_models()
-        self.load_tooluniverse()
-        logger.info("Model and tools initialized successfully")
-    def load_models(self, model_name: Optional[str] = None) -> str:
-        """
-        Load the specified model or the default model if none specified.
-        """
-        if model_name is not None:
-            if model_name == self.model_name:
-                return f"The model {model_name} is already loaded."
-            self.model_name = model_name
         try:
-            self.model = LLM(
-                model=self.model_name,
-                dtype="float16",
-                max_model_len=131072,
-                max_num_batched_tokens=65536,
-                max_num_seqs=512,
-                gpu_memory_utilization=0.95,
-                trust_remote_code=True,
             )
-            self.tokenizer = self.model.get_tokenizer()
-            self.chat_template = Template(self.tokenizer.chat_template)
-            logger.info(
-                "Model %s loaded with max_model_len=%d, max_num_batched_tokens=%d",
-                self.model_name, 131072, 65536
             )
-            return f"Model {model_name} loaded successfully."
         except Exception as e:
-            logger.error("Failed to load model: %s", str(e))
             raise RuntimeError(f"Failed to load model: {str(e)}")
-    def load_tooluniverse(self) -> None:
-        """Load and initialize the tool universe with specified tools."""
         try:
-            self.tooluniverse = ToolUniverse(tool_files=self.tool_files_dict)
-            self.tooluniverse.load_tools()
-            special_tools = self.tooluniverse.prepare_tool_prompts(
-                self.tooluniverse.tool_category_dicts["special_tools"])
-            self.special_tools_name = [tool['name'] for tool in special_tools]
-            logger.info("ToolUniverse loaded with %d special tools", len(self.special_tools_name))
         except Exception as e:
-            logger.error("Failed to load tools: %s", str(e))
-            raise RuntimeError(f"Failed to load tools: {str(e)}")
-    def run_multistep_agent(self,
-                          message: str,
-                          temperature: float,
-                          max_new_tokens: int,
-                          max_token: int,
-                          max_round: int = 5,
-                          call_agent: bool = False,
-                          call_agent_level: int = 0) -> Optional[str]:
-        """
-        Run multi-step reasoning with the agent.
-        """
-        logger.info("Starting multistep agent for message: %s", message[:100])
-        picked_tools_prompt = []
-        call_agent_level = 0
-        if call_agent:
-            call_agent_level += 1
-            if call_agent_level >= 2:
-                call_agent = False
-        conversation = []
-        conversation = self.set_system_prompt(conversation, self.prompt_multi_step)
-        conversation.append({"role": "user", "content": message})
-        outputs = []
-        last_outputs = []
-        next_round = True
-        current_round = 0
-        token_overflow = False
-        enable_summary = False
-        last_status = {}
-        while next_round and current_round < max_round:
-            current_round += 1
-            if len(outputs) > 0:
-                function_call_messages, picked_tools_prompt, special_tool_call = self.run_function_call(
-                    last_outputs,
-                    return_message=True,
-                    existing_tools_prompt=picked_tools_prompt,
-                    message_for_call_agent=message,
-                    call_agent=call_agent,
-                    call_agent_level=call_agent_level,
-                    temperature=temperature
-                )
-                if special_tool_call == 'Finish':
-                    next_round = False
-                    conversation.extend(function_call_messages)
-                    content = function_call_messages[0]['content']
-                    if content is None:
-                        return "❌ No content returned after Finish tool call."
-                    return content.split('[FinalAnswer]')[-1]
-                if (self.enable_summary or token_overflow) and not call_agent:
-                    enable_summary = True
-                last_status = self.function_result_summary(
-                    conversation, status=last_status, enable_summary=enable_summary)
-                if function_call_messages:
-                    conversation.extend(function_call_messages)
-                    outputs.append(tool_result_format(function_call_messages))
-                else:
-                    next_round = False
-                    conversation.extend([{"role": "assistant", "content": ''.join(last_outputs)}])
-                    return ''.join(last_outputs).replace("</s>", "")
-            last_outputs = []
-            outputs.append("### TxAgent:\n")
-            last_outputs_str, token_overflow = self.llm_infer(
-                messages=conversation,
-                temperature=temperature,
-                tools=picked_tools_prompt,
-                skip_special_tokens=False,
-                max_new_tokens=2048,
-                max_token=131072,
-                check_token_status=True)
-            if last_outputs_str is None:
-                logger.warning("Token limit exceeded")
-                if self.force_finish:
-                    return self.get_answer_based_on_unfinished_reasoning(
-                        conversation, temperature, max_new_tokens, max_token)
-                return "❌ Token limit exceeded."
-            last_outputs.append(last_outputs_str)
-        if max_round == current_round:
-            logger.warning("Max rounds exceeded")
-        if self.force_finish:
-            return self.get_answer_based_on_unfinished_reasoning(
-                conversation, temperature, max_new_tokens, max_token)
-        return None
-    def run_function_call(self,
-                         fcall_str: str,
-                         return_message: bool = False,
-                         existing_tools_prompt: Optional[List] = None,
-                         message_for_call_agent: Optional[str] = None,
-                         call_agent: bool = False,
-                         call_agent_level: Optional[int] = None,
-                         temperature: Optional[float] = None) -> Tuple[List[Dict], List, str]:
-        """
-        Execute function calls from the model's output.
-        """
-        try:
-            function_call_json, message = self.tooluniverse.extract_function_call_json(
-                fcall_str, return_message=return_message, verbose=False)
         except Exception as e:
-            logger.error("Tool call parsing failed: %s", e)
-            function_call_json = []
-            message = fcall_str
-        call_results = []
-        special_tool_call = ''
-        if function_call_json:
-            if isinstance(function_call_json, list):
-                for i in range(len(function_call_json)):
-                    logger.info("Tool Call: %s", function_call_json[i])
-                    if function_call_json[i]["name"] == 'Finish':
-                        special_tool_call = 'Finish'
-                        break
-                    elif function_call_json[i]["name"] == 'CallAgent':
-                        if call_agent_level is not None and call_agent_level < 2 and call_agent:
-                            solution_plan = function_call_json[i]['arguments']['solution']
-                            full_message = (
-                                (message_for_call_agent or "") +
-                                "\nYou must follow the following plan to answer the question: " +
-                                str(solution_plan))
-                            call_result = self.run_multistep_agent(
-                                full_message,
-                                temperature=temperature,
-                                max_new_tokens=512,
-                                max_token=131072,
-                                call_agent=False,
-                                call_agent_level=call_agent_level
-                            )
-                            if call_result is None:
-                                call_result = "⚠️ No content returned from sub-agent."
-                            else:
-                                call_result = call_result.split('[FinalAnswer]')[-1].strip()
-                        else:
-                            call_result = "Error: CallAgent disabled."
-                    else:
-                        call_result = self.tooluniverse.run_one_function(function_call_json[i])
-                    call_id = self.tooluniverse.call_id_gen()
-                    function_call_json[i]["call_id"] = call_id
-                    logger.info("Tool Call Result: %s", call_result)
-                    call_results.append({
-                        "role": "tool",
-                        "content": json.dumps({
-                            "tool_name": function_call_json[i]["name"],
-                            "content": call_result,
-                            "call_id": call_id
-                        })
-                    })
-        revised_messages = [{
-            "role": "assistant",
-            "content": message.strip(),
-            "tool_calls": json.dumps(function_call_json)
-        }] + call_results
-        return revised_messages, existing_tools_prompt or [], special_tool_call
-    def llm_infer(self,
-                 messages: List[Dict],
-                 temperature: float = 0.1,
-                 tools: Optional[List] = None,
-                 output_begin_string: Optional[str] = None,
-                 max_new_tokens: int = 512,
-                 max_token: int = 131072,
-                 skip_special_tokens: bool = True,
-                 model: Optional[LLM] = None,
-                 check_token_status: bool = False) -> Union[str, Tuple[str, bool]]:
-        """
-        Perform inference using the LLM.
-        """
-        model = model or self.model
-        tokenizer = self.tokenizer
-        sampling_params = SamplingParams(
-            temperature=temperature,
-            max_tokens=max_new_tokens,
-            seed=self.seed,
-        )
-        prompt = self.chat_template.render(
-            messages=messages, tools=tools, add_generation_prompt=True)
-        if output_begin_string is not None:
-            prompt += output_begin_string
-        token_overflow = False
-        if check_token_status and max_token is not None:
-            num_input_tokens = len(tokenizer.encode(prompt, add_special_tokens=False))
-            logger.info("Input prompt tokens: %d, max_token: %d", num_input_tokens, max_token)
-            if num_input_tokens > max_token:
-                torch.cuda.empty_cache()
-                gc.collect()
-                logger.warning("Token overflow: %d > %d", num_input_tokens, max_token)
-                return (None, True) if check_token_status else None
-        try:
-            output = model.generate(prompt, sampling_params=sampling_params)
-            output_text = output[0].outputs[0].text
-            output_tokens = len(tokenizer.encode(output_text, add_special_tokens=False))
-            logger.debug("Inference output: %s (output tokens: %d)", output_text[:100], output_tokens)
-            if skip_special_tokens:
-                output_text = output_text.replace("</s>", "").strip()
-            torch.cuda.empty_cache()
-            gc.collect()
-            return (output_text, token_overflow) if check_token_status else output_text
-        except Exception as e:
-            logger.error("Inference failed: %s", str(e))
-            raise RuntimeError(f"Inference failed: {str(e)}")
-    def cleanup(self) -> None:
-        """Clean up resources and clear memory."""
         if hasattr(self, 'model'):
             del self.model
         if hasattr(self, 'rag_model'):
             del self.rag_model
-        if hasattr(self, 'tooluniverse'):
-            del self.tooluniverse
         torch.cuda.empty_cache()
-        gc.collect()
         logger.info("TxAgent resources cleaned up")
     def __del__(self):

 import os
 import json
 import logging
+import torch
+from typing import List, Dict, Optional, Union
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from sentence_transformers import SentenceTransformer
+# Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger("TxAgent")
 class TxAgent:
     def __init__(self,
                  model_name: str,
                  rag_model_name: str,
                  tool_files_dict: Optional[Dict] = None,
+                 use_vllm: bool = False,
                  force_finish: bool = True,
+                 enable_checker: bool = True,
+                 step_rag_num: int = 4,
+                 seed: Optional[int] = None):
         self.model_name = model_name
         self.rag_model_name = rag_model_name
         self.tool_files_dict = tool_files_dict or {}
+        self.use_vllm = use_vllm
         self.force_finish = force_finish
         self.enable_checker = enable_checker
+        self.step_rag_num = step_rag_num
+        self.seed = seed
+        self.model = None
+        self.tokenizer = None
+        self.rag_model = None
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Initializing TxAgent with model: {model_name} on device: {self.device}")
+    def init_model(self):
+        """Initialize both the main model and RAG model."""
         self.load_models()
+        self.load_rag_model()
+        logger.info("Model initialization complete")
+    def load_models(self):
+        """Load the main LLM model."""
         try:
+            logger.info(f"Loading model: {self.model_name}")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                cache_dir=os.environ.get("TRANSFORMERS_CACHE")
             )
+            logger.info(f"Successfully loaded model on {self.device}")
         except Exception as e:
+            logger.error(f"Failed to load model: {str(e)}")
             raise RuntimeError(f"Failed to load model: {str(e)}")
+    def load_rag_model(self):
+        """Load the RAG model."""
         try:
+            logger.info(f"Loading RAG model: {self.rag_model_name}")
+            self.rag_model = SentenceTransformer(
+                self.rag_model_name,
+                device=str(self.device)
+            )
+            logger.info("RAG model loaded successfully")
         except Exception as e:
+            logger.error(f"Failed to load RAG model: {str(e)}")
+            raise RuntimeError(f"Failed to load RAG model: {str(e)}")
+    def process_document(self, file_path: str) -> Dict:
+        """Process a document and return analysis results."""
+        try:
+            # Extract text (implement your extraction logic)
+            text = self.extract_text(file_path)
+            # Process with LLM (implement your processing logic)
+            result = self.analyze_text(text)
+            return {
+                "status": "success",
+                "analysis": result,
+                "model": self.model_name
+            }
         except Exception as e:
+            logger.error(f"Document processing failed: {str(e)}")
+            raise RuntimeError(f"Document processing failed: {str(e)}")
+    def extract_text(self, file_path: str) -> str:
+        """Extract text from various file formats."""
+        # Implement your text extraction logic here
+        return "Sample extracted text"
+    def analyze_text(self, text: str) -> str:
+        """Analyze extracted text using the LLM."""
+        # Implement your text analysis logic here
+        return "Sample analysis result"
+    def cleanup(self):
+        """Clean up resources."""
         if hasattr(self, 'model'):
             del self.model
         if hasattr(self, 'rag_model'):
             del self.rag_model
         torch.cuda.empty_cache()
         logger.info("TxAgent resources cleaned up")
     def __del__(self):