import os from smolagents import CodeAgent, ToolCallingAgent from smolagents import OpenAIServerModel from tools.fetch import fetch_webpage from tools.yttranscript import get_youtube_transcript, get_youtube_title_description import myprompts from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # --- Basic Agent Definition --- class BasicAgent: def __init__(self): print("BasicAgent initialized.") def __call__(self, question: str) -> str: print(f"Agent received question (first 50 chars): {question[:50]}...") try: # Use the reviewer agent to determine if the question can be answered by a model or requires code print("Calling reviewer agent...") reviewer_answer = reviewer_agent.run(myprompts.review_prompt + "\nThe question is:\n" + question) print(f"Reviewer agent answer: {reviewer_answer}") question = question + '\n' + myprompts.output_format fixed_answer = "" if reviewer_answer == "code": fixed_answer = gaia_agent.run(question) print(f"Code agent answer: {fixed_answer}") elif reviewer_answer == "model": # If the reviewer agent suggests using the model, we can proceed with the model agent print("Using model agent to answer the question.") fixed_answer = model_agent.run(myprompts.model_prompt + "\nThe question is:\n" + question) print(f"Model agent answer: {fixed_answer}") return fixed_answer except Exception as e: error = f"An error occurred while processing the question: {e}" print(error) return error # Load model and tokenizer model_id = "LiquidAI/LFM2-1.2B" model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype="bfloat16", trust_remote_code=True, # attn_implementation="flash_attention_2" # <- uncomment on compatible GPU ) tokenizer = AutoTokenizer.from_pretrained(model_id) # Create a wrapper class that matches the expected interface class LocalLlamaModel: def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer self.device = model.device if hasattr(model, 'device') else 'cpu' def generate(self, prompt: str, max_new_tokens=512, **kwargs): # Generate answer using the provided prompt input_ids = self.tokenizer.apply_chat_template( [{"role": "user", "content": prompt}], add_generation_prompt=True, return_tensors="pt", tokenize=True, ).to(self.model.device) output = self.model.generate( input_ids, do_sample=True, temperature=0.3, min_p=0.15, repetition_penalty=1.05, max_new_tokens=max_new_tokens, ) output = self.tokenizer.decode(output[0], skip_special_tokens=False) return output def __call__(self, prompt: str, max_new_tokens=512, **kwargs): """Make the model callable like a function""" return self.generate(prompt, max_new_tokens, **kwargs) # Create the model instance wrapped_model = LocalLlamaModel(model, tokenizer) # Now create your agents - these should work with the wrapped model reviewer_agent = ToolCallingAgent(model=wrapped_model, tools=[]) model_agent = ToolCallingAgent(model=wrapped_model, tools=[fetch_webpage]) gaia_agent = CodeAgent( tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript], model=wrapped_model ) if __name__ == "__main__": # Example usage question = "What was the actual enrollment of the Malko competition in 2023?" agent = BasicAgent() answer = agent(question) print(f"Answer: {answer}")