File size: 3,107 Bytes
ed9acbe
 
 
 
 
 
9f08c4f
da62d97
455866a
ed9acbe
 
c06e6b5
0006d3c
a94813f
 
 
d9f0f18
c06e6b5
 
 
 
 
 
 
 
 
 
 
d9f0f18
c06e6b5
 
 
55b792c
c06e6b5
 
 
 
 
 
 
 
78f6f4d
d9f0f18
 
c06e6b5
d9f0f18
 
 
 
 
c06e6b5
d9f0f18
c06e6b5
d9f0f18
c06e6b5
 
55b792c
c06e6b5
ed9acbe
c06e6b5
 
 
 
 
 
ed9acbe
c06e6b5
 
 
 
 
 
 
 
 
 
0006d3c
ed9acbe
c06e6b5
ed9acbe
c06e6b5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
import myprompts
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# --- Basic Agent Definition ---

# Basic model wrapper for local inference with debug info
class BasicAgent:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        self.device = model.device if hasattr(model, 'device') else 'cpu'
        print(f"Model device: {self.device}")

    def _extract_prompt(self, prompt):
        if isinstance(prompt, str):
            return prompt
        elif isinstance(prompt, list):
            # Convert list of ChatMessages or dicts to plain text
            return "\n".join(
                msg.content if hasattr(msg, "content") else msg.get("content", str(msg))
                for msg in prompt
            )
        else:
            return str(prompt)

    def generate(self, prompt, max_new_tokens=512):
        try:
            print("\n[DEBUG] Raw prompt input:", prompt)
            text_prompt = self._extract_prompt(prompt)
            print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)

            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device)
            input_ids = inputs["input_ids"]

            print("[DEBUG] Tokenized input shape:", input_ids.shape)

            with torch.no_grad():
                output = self.model.generate(
                    input_ids=input_ids,
                    do_sample=True,
                    temperature=0.3,
                    min_p=0.15,
                    repetition_penalty=1.05,
                    max_new_tokens=max_new_tokens,
                    pad_token_id=self.tokenizer.eos_token_id,
                )

            new_tokens = output[0][len(input_ids[0]):]
            decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
            print("[DEBUG] Decoded output:", decoded.strip())

            return decoded.strip()

        except Exception as e:
            print(f"[ERROR] Generation failed: {e}")
            return f"Error generating response: {e}"

    def __call__(self, prompt, max_new_tokens=512):
        return self.generate(prompt, max_new_tokens)

# Load your model and tokenizer
def load_model(model_id="LiquidAI/LFM2-1.2B"):
    print(f"Loading model: {model_id}")
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    return BasicAgent(model, tokenizer)

# Run minimal test
if __name__ == "__main__":
    model = load_model()

    # Example prompt
    prompt = "What is the capital of France?"

    print("\n[TEST] Asking a simple question...")
    response = model(prompt)
    print("\nFinal Answer:", response)