Final_Assignment_Template3

Sleeping

App Files Files Community

bwilkie commited on Jul 22

Commit

e4f7d1f

verified ·

1 Parent(s): 9ff076a

Update myagent.py

Browse files

Files changed (1) hide show

myagent.py +15 -18

myagent.py CHANGED Viewed

@@ -49,11 +49,10 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     torch_dtype="bfloat16",
     trust_remote_code=True,
-#    attn_implementation="flash_attention_2" <- uncomment on compatible GPU
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create a wrapper class that matches the expected interface
 class LocalLlamaModel:
     def __init__(self, model, tokenizer):
@@ -61,29 +60,25 @@ class LocalLlamaModel:
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
-    def generate(self, prompt: str, max_new_tokens=512*10, **kwargs):
-        # Generate answer
-        prompt = "What is C. elegans?"
-        input_ids = tokenizer.apply_chat_template(
             [{"role": "user", "content": prompt}],
             add_generation_prompt=True,
             return_tensors="pt",
             tokenize=True,
-        ).to(model.device)
-        output = model.generate(
             input_ids,
             do_sample=True,
             temperature=0.3,
             min_p=0.15,
             repetition_penalty=1.05,
-            max_new_tokens=512,
         )
-        output =tokenizer.decode(output[0], skip_special_tokens=False)
         return output
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
@@ -91,16 +86,18 @@ class LocalLlamaModel:
         return self.generate(prompt, max_new_tokens, **kwargs)
 # Create the model instance
-model = LocalLlamaModel(model_init, tokenizer)
 # Now create your agents - these should work with the wrapped model
-reviewer_agent = ToolCallingAgent(model=model, tools=[])
-model_agent = ToolCallingAgent(model=model, tools=[fetch_webpage])
 gaia_agent = CodeAgent(
-    tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript],
-    model=model
 )
 if __name__ == "__main__":
     # Example usage
     question = "What was the actual enrollment of the Malko competition in 2023?"

     device_map="auto",
     torch_dtype="bfloat16",
     trust_remote_code=True,
+    # attn_implementation="flash_attention_2"  # <- uncomment on compatible GPU
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Create a wrapper class that matches the expected interface
 class LocalLlamaModel:
     def __init__(self, model, tokenizer):
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
+    def generate(self, prompt: str, max_new_tokens=512, **kwargs):
+        # Generate answer using the provided prompt
+        input_ids = self.tokenizer.apply_chat_template(
             [{"role": "user", "content": prompt}],
             add_generation_prompt=True,
             return_tensors="pt",
             tokenize=True,
+        ).to(self.model.device)
+        output = self.model.generate(
             input_ids,
             do_sample=True,
             temperature=0.3,
             min_p=0.15,
             repetition_penalty=1.05,
+            max_new_tokens=max_new_tokens,
         )
+        output = self.tokenizer.decode(output[0], skip_special_tokens=False)
         return output
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
         return self.generate(prompt, max_new_tokens, **kwargs)
 # Create the model instance
+wrapped_model = LocalLlamaModel(model, tokenizer)
 # Now create your agents - these should work with the wrapped model
+reviewer_agent = ToolCallingAgent(model=wrapped_model, tools=[])
+model_agent = ToolCallingAgent(model=wrapped_model, tools=[fetch_webpage])
 gaia_agent = CodeAgent(
+    tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript],
+    model=wrapped_model
 )
 if __name__ == "__main__":
     # Example usage
     question = "What was the actual enrollment of the Malko competition in 2023?"