File size: 1,394 Bytes
392dd49 48b2ebf b8f5365 48b2ebf 392dd49 48b2ebf 392dd49 b8f5365 392dd49 48b2ebf 392dd49 48b2ebf 392dd49 48b2ebf 392dd49 48b2ebf 392dd49 48b2ebf 392dd49 48b2ebf 392dd49 b8f5365 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
app = FastAPI()
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-0.5B-Instruct",
torch_dtype=torch.float32,
trust_remote_code=True
)
model = PeftModel.from_pretrained(model, "./adapter", is_trainable=False)
model.eval()
def build_prompt(messages):
prompt = ""
for msg in messages:
role = "User" if msg["role"] == "user" else "Assistant"
prompt += f"### {role}:\n{msg['content']}\n"
prompt += "### Assistant:\n"
return prompt
class ChatRequest(BaseModel):
messages: list # [{"role": "user", "content": "..."}]
@app.post("/chat")
async def chat(req: ChatRequest):
prompt = build_prompt(req.messages)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.95,
eos_token_id=tokenizer.eos_token_id
)
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
reply = output_text.split("### Assistant:")[-1].strip()
return {"response": reply}
|