import os from smolagents import CodeAgent, ToolCallingAgent from smolagents import OpenAIServerModel from tools.fetch import fetch_webpage from tools.yttranscript import get_youtube_transcript, get_youtube_title_description import myprompts from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # --- Basic Agent Definition --- # Basic model wrapper for local inference with debug info class BasicAgent: def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer self.device = model.device if hasattr(model, 'device') else 'cpu' print(f"Model device: {self.device}") def _extract_prompt(self, prompt): if isinstance(prompt, str): return prompt elif isinstance(prompt, list): # Convert list of ChatMessages or dicts to plain text return "\n".join( msg.content if hasattr(msg, "content") else msg.get("content", str(msg)) for msg in prompt ) else: return str(prompt) def generate(self, prompt, max_new_tokens=512): try: print("\n[DEBUG] Raw prompt input:", prompt) text_prompt = self._extract_prompt(prompt) print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt) inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device) input_ids = inputs["input_ids"] print("[DEBUG] Tokenized input shape:", input_ids.shape) with torch.no_grad(): output = self.model.generate( input_ids=input_ids, do_sample=True, temperature=0.3, min_p=0.15, repetition_penalty=1.05, max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.eos_token_id, ) new_tokens = output[0][len(input_ids[0]):] decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True) print("[DEBUG] Decoded output:", decoded.strip()) return decoded.strip() except Exception as e: print(f"[ERROR] Generation failed: {e}") return f"Error generating response: {e}" def __call__(self, prompt, max_new_tokens=512): return self.generate(prompt, max_new_tokens) # Load your model and tokenizer def load_model(model_id="LiquidAI/LFM2-1.2B"): print(f"Loading model: {model_id}") model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(model_id) return BasicAgent(model, tokenizer) # Run minimal test if __name__ == "__main__": model = load_model() # Example prompt prompt = "What is the capital of France?" print("\n[TEST] Asking a simple question...") response = model(prompt) print("\nFinal Answer:", response)