Spaces:
Sleeping
Sleeping
import os | |
from smolagents import CodeAgent, ToolCallingAgent | |
from smolagents import OpenAIServerModel | |
from tools.fetch import fetch_webpage | |
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description | |
import myprompts | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
import torch | |
# --- Basic Agent Definition --- | |
# Basic model wrapper for local inference with debug info | |
class BasicAgent: | |
def __init__(self, model, tokenizer): | |
self.model = model | |
self.tokenizer = tokenizer | |
self.device = model.device if hasattr(model, 'device') else 'cpu' | |
print(f"Model device: {self.device}") | |
def _extract_prompt(self, prompt): | |
if isinstance(prompt, str): | |
return prompt | |
elif isinstance(prompt, list): | |
# Convert list of ChatMessages or dicts to plain text | |
return "\n".join( | |
msg.content if hasattr(msg, "content") else msg.get("content", str(msg)) | |
for msg in prompt | |
) | |
else: | |
return str(prompt) | |
def generate(self, prompt, max_new_tokens=512): | |
try: | |
print("\n[DEBUG] Raw prompt input:", prompt) | |
text_prompt = self._extract_prompt(prompt) | |
print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt) | |
inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device) | |
input_ids = inputs["input_ids"] | |
print("[DEBUG] Tokenized input shape:", input_ids.shape) | |
with torch.no_grad(): | |
output = self.model.generate( | |
input_ids=input_ids, | |
do_sample=True, | |
temperature=0.3, | |
min_p=0.15, | |
repetition_penalty=1.05, | |
max_new_tokens=max_new_tokens, | |
pad_token_id=self.tokenizer.eos_token_id, | |
) | |
new_tokens = output[0][len(input_ids[0]):] | |
decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True) | |
print("[DEBUG] Decoded output:", decoded.strip()) | |
return decoded.strip() | |
except Exception as e: | |
print(f"[ERROR] Generation failed: {e}") | |
return f"Error generating response: {e}" | |
def __call__(self, prompt, max_new_tokens=512): | |
return self.generate(prompt, max_new_tokens) | |
# Load your model and tokenizer | |
def load_model(model_id="LiquidAI/LFM2-1.2B"): | |
print(f"Loading model: {model_id}") | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
return BasicAgent(model, tokenizer) | |
# Run minimal test | |
if __name__ == "__main__": | |
model = load_model() | |
# Example prompt | |
prompt = "What is the capital of France?" | |
print("\n[TEST] Asking a simple question...") | |
response = model(prompt) | |
print("\nFinal Answer:", response) |