|
import os |
|
from smolagents import CodeAgent, ToolCallingAgent |
|
from smolagents import OpenAIServerModel |
|
from tools.fetch import fetch_webpage |
|
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description |
|
import myprompts |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import torch |
|
|
|
|
|
|
|
|
|
class BasicAgent: |
|
def __init__(self, model, tokenizer): |
|
self.model = model |
|
self.tokenizer = tokenizer |
|
self.device = model.device if hasattr(model, 'device') else 'cpu' |
|
print(f"Model device: {self.device}") |
|
|
|
def _extract_prompt(self, prompt): |
|
if isinstance(prompt, str): |
|
return prompt |
|
elif isinstance(prompt, list): |
|
|
|
return "\n".join( |
|
msg.content if hasattr(msg, "content") else msg.get("content", str(msg)) |
|
for msg in prompt |
|
) |
|
else: |
|
return str(prompt) |
|
|
|
def generate(self, prompt, max_new_tokens=512): |
|
try: |
|
print("\n[DEBUG] Raw prompt input:", prompt) |
|
text_prompt = self._extract_prompt(prompt) |
|
print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt) |
|
|
|
inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device) |
|
input_ids = inputs["input_ids"] |
|
|
|
print("[DEBUG] Tokenized input shape:", input_ids.shape) |
|
|
|
with torch.no_grad(): |
|
output = self.model.generate( |
|
input_ids=input_ids, |
|
do_sample=True, |
|
temperature=0.3, |
|
min_p=0.15, |
|
repetition_penalty=1.05, |
|
max_new_tokens=max_new_tokens, |
|
pad_token_id=self.tokenizer.eos_token_id, |
|
) |
|
|
|
new_tokens = output[0][len(input_ids[0]):] |
|
decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True) |
|
print("[DEBUG] Decoded output:", decoded.strip()) |
|
|
|
return decoded.strip() |
|
|
|
except Exception as e: |
|
print(f"[ERROR] Generation failed: {e}") |
|
return f"Error generating response: {e}" |
|
|
|
def __call__(self, prompt, max_new_tokens=512): |
|
return self.generate(prompt, max_new_tokens) |
|
|
|
|
|
def load_model(model_id="LiquidAI/LFM2-1.2B"): |
|
print(f"Loading model: {model_id}") |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map="auto", |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True, |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
return BasicAgent(model, tokenizer) |
|
|
|
|
|
if __name__ == "__main__": |
|
model = load_model() |
|
|
|
|
|
prompt = "What is the capital of France?" |
|
|
|
print("\n[TEST] Asking a simple question...") |
|
response = model(prompt) |
|
print("\nFinal Answer:", response) |