Ais commited on
Commit
1cf2bdf
·
verified ·
1 Parent(s): 081917b

Create inference.py

Browse files
Files changed (1) hide show
  1. app/inference.py +34 -0
app/inference.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
2
+ from peft import PeftModel
3
+ import torch
4
+
5
+ ADAPTER_PATH = "adapter"
6
+ BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ BASE_MODEL,
11
+ device_map="auto",
12
+ trust_remote_code=True,
13
+ torch_dtype=torch.float16
14
+ )
15
+ model = PeftModel.from_pretrained(model, ADAPTER_PATH)
16
+ model.eval()
17
+
18
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
19
+
20
+ def generate_response(prompt: str) -> str:
21
+ formatted = f"<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
22
+ inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
23
+ with torch.no_grad():
24
+ output = model.generate(
25
+ **inputs,
26
+ max_new_tokens=512,
27
+ temperature=0.7,
28
+ top_p=0.9,
29
+ do_sample=True,
30
+ pad_token_id=tokenizer.eos_token_id
31
+ )
32
+ decoded = tokenizer.decode(output[0], skip_special_tokens=True)
33
+ answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
34
+ return answer