bwilkie commited on
Commit
a6d2c9e
·
verified ·
1 Parent(s): aa012a4

Update myagent.py

Browse files
Files changed (1) hide show
  1. myagent.py +32 -10
myagent.py CHANGED
@@ -42,13 +42,16 @@ class BasicAgent:
42
 
43
 
44
 
45
- # Model configuration
46
- model_id = "bartowski/Llama-3.2-3B-Instruct-GGUF"
47
- filename = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
48
- torch_dtype = torch.bfloat16
49
- # Load tokenizer and model
50
- tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
51
- model_init = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
 
 
 
52
 
53
 
54
  # Create a wrapper class that matches the expected interface
@@ -59,9 +62,28 @@ class LocalLlamaModel:
59
  self.device = model.device if hasattr(model, 'device') else 'cpu'
60
 
61
  def generate(self, prompt: str, max_new_tokens=512*10, **kwargs):
62
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
63
- output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens)
64
- output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return output
66
 
67
  def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
 
42
 
43
 
44
 
45
+ # Load model and tokenizer
46
+ model_id = "LiquidAI/LFM2-1.2B"
47
+ model = AutoModelForCausalLM.from_pretrained(
48
+ model_id,
49
+ device_map="auto",
50
+ torch_dtype="bfloat16",
51
+ trust_remote_code=True,
52
+ # attn_implementation="flash_attention_2" <- uncomment on compatible GPU
53
+ )
54
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
55
 
56
 
57
  # Create a wrapper class that matches the expected interface
 
62
  self.device = model.device if hasattr(model, 'device') else 'cpu'
63
 
64
  def generate(self, prompt: str, max_new_tokens=512*10, **kwargs):
65
+
66
+
67
+ # Generate answer
68
+ prompt = "What is C. elegans?"
69
+ input_ids = tokenizer.apply_chat_template(
70
+ [{"role": "user", "content": prompt}],
71
+ add_generation_prompt=True,
72
+ return_tensors="pt",
73
+ tokenize=True,
74
+ ).to(model.device)
75
+
76
+ output = model.generate(
77
+ input_ids,
78
+ do_sample=True,
79
+ temperature=0.3,
80
+ min_p=0.15,
81
+ repetition_penalty=1.05,
82
+ max_new_tokens=512,
83
+ )
84
+
85
+ output =tokenizer.decode(output[0], skip_special_tokens=False)
86
+
87
  return output
88
 
89
  def __call__(self, prompt: str, max_new_tokens=512, **kwargs):