Spaces:
Sleeping
Sleeping
File size: 3,107 Bytes
ed9acbe 9f08c4f da62d97 455866a ed9acbe c06e6b5 0006d3c a94813f d9f0f18 c06e6b5 d9f0f18 c06e6b5 55b792c c06e6b5 78f6f4d d9f0f18 c06e6b5 d9f0f18 c06e6b5 d9f0f18 c06e6b5 d9f0f18 c06e6b5 55b792c c06e6b5 ed9acbe c06e6b5 ed9acbe c06e6b5 0006d3c ed9acbe c06e6b5 ed9acbe c06e6b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
from smolagents import CodeAgent, ToolCallingAgent
from smolagents import OpenAIServerModel
from tools.fetch import fetch_webpage
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
import myprompts
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
# --- Basic Agent Definition ---
# Basic model wrapper for local inference with debug info
class BasicAgent:
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
self.device = model.device if hasattr(model, 'device') else 'cpu'
print(f"Model device: {self.device}")
def _extract_prompt(self, prompt):
if isinstance(prompt, str):
return prompt
elif isinstance(prompt, list):
# Convert list of ChatMessages or dicts to plain text
return "\n".join(
msg.content if hasattr(msg, "content") else msg.get("content", str(msg))
for msg in prompt
)
else:
return str(prompt)
def generate(self, prompt, max_new_tokens=512):
try:
print("\n[DEBUG] Raw prompt input:", prompt)
text_prompt = self._extract_prompt(prompt)
print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device)
input_ids = inputs["input_ids"]
print("[DEBUG] Tokenized input shape:", input_ids.shape)
with torch.no_grad():
output = self.model.generate(
input_ids=input_ids,
do_sample=True,
temperature=0.3,
min_p=0.15,
repetition_penalty=1.05,
max_new_tokens=max_new_tokens,
pad_token_id=self.tokenizer.eos_token_id,
)
new_tokens = output[0][len(input_ids[0]):]
decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
print("[DEBUG] Decoded output:", decoded.strip())
return decoded.strip()
except Exception as e:
print(f"[ERROR] Generation failed: {e}")
return f"Error generating response: {e}"
def __call__(self, prompt, max_new_tokens=512):
return self.generate(prompt, max_new_tokens)
# Load your model and tokenizer
def load_model(model_id="LiquidAI/LFM2-1.2B"):
print(f"Loading model: {model_id}")
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
return BasicAgent(model, tokenizer)
# Run minimal test
if __name__ == "__main__":
model = load_model()
# Example prompt
prompt = "What is the capital of France?"
print("\n[TEST] Asking a simple question...")
response = model(prompt)
print("\nFinal Answer:", response) |