CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 18

Commit

98935c9

verified ·

1 Parent(s): 3fa2049

Update src/txagent/txagent.py

Browse files

Files changed (1) hide show

src/txagent/txagent.py +10 -10

src/txagent/txagent.py CHANGED Viewed

@@ -73,7 +73,7 @@ class TxAgent:
                 return f"The model {model_name} is already loaded."
             self.model_name = model_name
-        self.model = LLM(model=self.model_name, dtype="float16", max_model_len=512, gpu_memory_utilization=0.8)
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         logger.info("Model %s loaded successfully", self.model_name)
@@ -197,7 +197,7 @@ class TxAgent:
                             )
                             call_result = self.run_multistep_agent(
                                 full_message, temperature=temperature,
-                                max_new_tokens=128, max_token=768,
                                 call_agent=False, call_agent_level=call_agent_level)
                             if call_result is None:
                                 call_result = "⚠️ No content returned from sub-agent."
@@ -264,7 +264,7 @@ class TxAgent:
                             sub_agent_task = "Sub TxAgent plan: " + str(solution_plan)
                             call_result = yield from self.run_gradio_chat(
                                 full_message, history=[], temperature=temperature,
-                                max_new_tokens=128, max_token=768,
                                 call_agent=False, call_agent_level=call_agent_level,
                                 conversation=None, sub_agent_task=sub_agent_task)
                             if call_result is not None and isinstance(call_result, str):
@@ -356,7 +356,7 @@ class TxAgent:
                 if (self.enable_summary or token_overflow) and not call_agent:
                     enable_summary = True
                 last_status = self.function_result_summary(
-                    -                    conversation, status=last_status, enable_summary=enable_summary)
                 if function_call_messages:
                     conversation.extend(function_call_messages)
@@ -400,8 +400,8 @@ class TxAgent:
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None,
-                  output_begin_string=None, max_new_tokens=128,
-                  max_token=768, skip_special_tokens=True,
                   model=None, tokenizer=None, terminators=None,
                   seed=None, check_token_status=False):
         if model is None:
@@ -549,8 +549,8 @@ Summarize the function calls' responses in one sentence with all necessary infor
                             thought_calls=this_thought_calls,
                             function_response=function_response,
                             temperature=0.1,
-                            max_new_tokens=128,
-                            max_token=768)
                         input_list.insert(last_call_idx + 1, {'role': 'tool', 'content': result_summary})
                         status['summarized_index'] = last_call_idx + 2
                         idx += 1
@@ -571,8 +571,8 @@ Summarize the function calls' responses in one sentence with all necessary infor
                 thought_calls=this_thought_calls,
                 function_response=function_response,
                 temperature=0.1,
-                max_new_tokens=128,
-                max_token=768)
             tool_calls = json.loads(input_list[last_call_idx]['tool_calls'])
             for tool_call in tool_calls:
                 del tool_call['call_id']

                 return f"The model {model_name} is already loaded."
             self.model_name = model_name
+        self.model = LLM(model=self.model_name, dtype="float16", max_model_len=1024, gpu_memory_utilization=0.8)
         self.chat_template = Template(self.model.get_tokenizer().chat_template)
         self.tokenizer = self.model.get_tokenizer()
         logger.info("Model %s loaded successfully", self.model_name)
                             )
                             call_result = self.run_multistep_agent(
                                 full_message, temperature=temperature,
+                                max_new_tokens=512, max_token=1024,
                                 call_agent=False, call_agent_level=call_agent_level)
                             if call_result is None:
                                 call_result = "⚠️ No content returned from sub-agent."
                             sub_agent_task = "Sub TxAgent plan: " + str(solution_plan)
                             call_result = yield from self.run_gradio_chat(
                                 full_message, history=[], temperature=temperature,
+                                max_new_tokens=512, max_token=1024,
                                 call_agent=False, call_agent_level=call_agent_level,
                                 conversation=None, sub_agent_task=sub_agent_task)
                             if call_result is not None and isinstance(call_result, str):
                 if (self.enable_summary or token_overflow) and not call_agent:
                     enable_summary = True
                 last_status = self.function_result_summary(
+                    conversation, status=last_status, enable_summary=enable_summary)
                 if function_call_messages:
                     conversation.extend(function_call_messages)
         return None
     def llm_infer(self, messages, temperature=0.1, tools=None,
+                  output_begin_string=None, max_new_tokens=512,
+                  max_token=1024, skip_special_tokens=True,
                   model=None, tokenizer=None, terminators=None,
                   seed=None, check_token_status=False):
         if model is None:
                             thought_calls=this_thought_calls,
                             function_response=function_response,
                             temperature=0.1,
+                            max_new_tokens=512,
+                            max_token=1024)
                         input_list.insert(last_call_idx + 1, {'role': 'tool', 'content': result_summary})
                         status['summarized_index'] = last_call_idx + 2
                         idx += 1
                 thought_calls=this_thought_calls,
                 function_response=function_response,
                 temperature=0.1,
+                max_new_tokens=512,
+                max_token=1024)
             tool_calls = json.loads(input_list[last_call_idx]['tool_calls'])
             for tool_call in tool_calls:
                 del tool_call['call_id']