Ais commited on
Commit
392dd49
·
verified ·
1 Parent(s): b67a3d2

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +12 -21
app/main.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, Request
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
@@ -6,49 +6,40 @@ import torch
6
 
7
  app = FastAPI()
8
 
9
- # Load tokenizer
10
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", use_auth_token=True)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
- # ✅ Load base model without quantization (for CPU)
14
  model = AutoModelForCausalLM.from_pretrained(
15
- "mistralai/Mistral-7B-Instruct-v0.2",
16
  torch_dtype=torch.float32,
17
- use_auth_token=True
18
  )
19
-
20
- # ✅ Load LoRA adapter
21
- ADAPTER_DIR = "./adapter/version 1"
22
- model = PeftModel.from_pretrained(model, ADAPTER_DIR)
23
  model.eval()
24
 
25
- # ✅ Build prompt from messages
26
  def build_prompt(messages):
27
  prompt = ""
28
  for msg in messages:
29
- if msg["role"] == "user":
30
- prompt += f"### User:\n{msg['content']}\n"
31
- elif msg["role"] == "assistant":
32
- prompt += f"### Assistant:\n{msg['content']}\n"
33
  prompt += "### Assistant:\n"
34
  return prompt
35
 
36
- # ✅ Input format
37
  class ChatRequest(BaseModel):
38
- messages: list # list of {"role": "user"/"assistant", "content": "..."}
39
 
40
  @app.post("/chat")
41
  async def chat(req: ChatRequest):
42
  prompt = build_prompt(req.messages)
43
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
44
- output = model.generate(
45
  **inputs,
46
  max_new_tokens=256,
47
  do_sample=True,
48
  temperature=0.7,
49
  top_p=0.95,
50
- eos_token_id=tokenizer.eos_token_id,
51
  )
52
- response = tokenizer.decode(output[0], skip_special_tokens=True)
53
- reply = response.split("### Assistant:")[-1].strip()
54
  return {"response": reply}
 
1
+ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel
 
6
 
7
  app = FastAPI()
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", trust_remote_code=True)
 
10
  tokenizer.pad_token = tokenizer.eos_token
11
 
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
+ "Qwen/Qwen2.5-0.5B-Instruct",
14
  torch_dtype=torch.float32,
15
+ trust_remote_code=True
16
  )
17
+ model = PeftModel.from_pretrained(model, "./adapter", is_trainable=False)
 
 
 
18
  model.eval()
19
 
 
20
  def build_prompt(messages):
21
  prompt = ""
22
  for msg in messages:
23
+ role = "User" if msg["role"] == "user" else "Assistant"
24
+ prompt += f"### {role}:\n{msg['content']}\n"
 
 
25
  prompt += "### Assistant:\n"
26
  return prompt
27
 
 
28
  class ChatRequest(BaseModel):
29
+ messages: list # [{"role": "user", "content": "..."}]
30
 
31
  @app.post("/chat")
32
  async def chat(req: ChatRequest):
33
  prompt = build_prompt(req.messages)
34
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
35
+ outputs = model.generate(
36
  **inputs,
37
  max_new_tokens=256,
38
  do_sample=True,
39
  temperature=0.7,
40
  top_p=0.95,
41
+ eos_token_id=tokenizer.eos_token_id
42
  )
43
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
+ reply = output_text.split("### Assistant:")[-1].strip()
45
  return {"response": reply}