import os
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
import myprompts
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# --- Basic Agent Definition ---

# Basic model wrapper for local inference with debug info
class BasicAgent:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.device = model.device if hasattr(model, 'device') else 'cpu'
        print(f"Model device: {self.device}")

    def _extract_prompt(self, prompt):
        if isinstance(prompt, str):
            return prompt
        elif isinstance(prompt, list):
            # Convert list of ChatMessages or dicts to plain text
            return "\n".join(
                msg.content if hasattr(msg, "content") else msg.get("content", str(msg))
                for msg in prompt
            )
        else:
            return str(prompt)

    def generate(self, prompt, max_new_tokens=512):
        try:
            print("\n[DEBUG] Raw prompt input:", prompt)
            text_prompt = self._extract_prompt(prompt)
            print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)

            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device)
            input_ids = inputs["input_ids"]

            print("[DEBUG] Tokenized input shape:", input_ids.shape)

            with torch.no_grad():
                output = self.model.generate(
                    input_ids=input_ids,
                    do_sample=True,
                    temperature=0.3,
                    min_p=0.15,
                    repetition_penalty=1.05,
                    max_new_tokens=max_new_tokens,
                    pad_token_id=self.tokenizer.eos_token_id,
                )

            new_tokens = output[0][len(input_ids[0]):]
            decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
            print("[DEBUG] Decoded output:", decoded.strip())

            return decoded.strip()

        except Exception as e:
            print(f"[ERROR] Generation failed: {e}")
            return f"Error generating response: {e}"

    def __call__(self, prompt, max_new_tokens=512):
        return self.generate(prompt, max_new_tokens)

# Load your model and tokenizer
def load_model(model_id="LiquidAI/LFM2-1.2B"):
    print(f"Loading model: {model_id}")
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    return BasicAgent(model, tokenizer)

# Run minimal test
if __name__ == "__main__":
    model = load_model()

    # Example prompt
    prompt = "What is the capital of France?"

    print("\n[TEST] Asking a simple question...")
    response = model(prompt)
    print("\nFinal Answer:", response)