Ais
commited on
Commit
Β·
48b2ebf
0
Parent(s):
π Initial commit of FastAPI LoRA chatbot
Browse files- Dockerfile +13 -0
- app/download_adapter.py +41 -0
- app/main.py +62 -0
- requirements.txt +9 -0
- start.sh +13 -0
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
USER user
|
5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
+
|
7 |
+
WORKDIR /app
|
8 |
+
|
9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
11 |
+
|
12 |
+
COPY --chown=user . /app
|
13 |
+
CMD ["bash", "start.sh"]
|
app/download_adapter.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gdown
|
3 |
+
import re
|
4 |
+
|
5 |
+
# β
Google Drive folder with adapter versions like 'version 1', 'version 2', etc.
|
6 |
+
DRIVE_FOLDER_URL = "https://drive.google.com/drive/folders/1S9xT92Zm9rZ4RSCxAe_DLld8vu78mqW4"
|
7 |
+
LOCAL_DEST = "adapter" # Where we'll copy the latest version
|
8 |
+
|
9 |
+
def download_latest_adapter():
|
10 |
+
print("π½ Downloading adapter folder from Google Drive...")
|
11 |
+
|
12 |
+
# Download everything from the Drive folder into temp dir
|
13 |
+
gdown.download_folder(url=DRIVE_FOLDER_URL, output="gdrive_tmp", quiet=False, use_cookies=False)
|
14 |
+
|
15 |
+
# Find all folders named "version X"
|
16 |
+
all_versions = sorted(
|
17 |
+
[d for d in os.listdir("gdrive_tmp") if re.match(r"version \d+", d)],
|
18 |
+
key=lambda x: int(x.split()[-1])
|
19 |
+
)
|
20 |
+
|
21 |
+
if not all_versions:
|
22 |
+
raise ValueError("β No version folders found in Google Drive folder.")
|
23 |
+
|
24 |
+
latest = all_versions[-1]
|
25 |
+
src = os.path.join("gdrive_tmp", latest)
|
26 |
+
print(f"β
Latest adapter found: {latest}")
|
27 |
+
|
28 |
+
# Ensure destination exists
|
29 |
+
os.makedirs(LOCAL_DEST, exist_ok=True)
|
30 |
+
|
31 |
+
# Copy files to destination
|
32 |
+
for file in os.listdir(src):
|
33 |
+
src_file = os.path.join(src, file)
|
34 |
+
dest_file = os.path.join(LOCAL_DEST, file)
|
35 |
+
os.system(f"cp '{src_file}' '{dest_file}'")
|
36 |
+
|
37 |
+
print(f"β
Adapter copied to: {LOCAL_DEST}")
|
38 |
+
|
39 |
+
# β
Run automatically if script is executed directly
|
40 |
+
if __name__ == "__main__":
|
41 |
+
download_latest_adapter()
|
app/main.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
4 |
+
from peft import PeftModel
|
5 |
+
import torch
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
# β
Load tokenizer
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
|
11 |
+
tokenizer.pad_token = tokenizer.eos_token
|
12 |
+
|
13 |
+
# β
Setup quantization config
|
14 |
+
bnb_config = BitsAndBytesConfig(
|
15 |
+
load_in_4bit=True,
|
16 |
+
bnb_4bit_use_double_quant=True,
|
17 |
+
bnb_4bit_quant_type="nf4",
|
18 |
+
bnb_4bit_compute_dtype=torch.float16
|
19 |
+
)
|
20 |
+
|
21 |
+
# β
Load base model
|
22 |
+
model = AutoModelForCausalLM.from_pretrained(
|
23 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
24 |
+
device_map="auto",
|
25 |
+
quantization_config=bnb_config
|
26 |
+
)
|
27 |
+
|
28 |
+
# β
Load LoRA adapter (ensure it's downloaded)
|
29 |
+
ADAPTER_DIR = "./adapter/version 1"
|
30 |
+
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
|
31 |
+
model.eval()
|
32 |
+
|
33 |
+
# β
Build prompt from messages
|
34 |
+
def build_prompt(messages):
|
35 |
+
prompt = ""
|
36 |
+
for msg in messages:
|
37 |
+
if msg["role"] == "user":
|
38 |
+
prompt += f"### User:\n{msg['content']}\n"
|
39 |
+
elif msg["role"] == "assistant":
|
40 |
+
prompt += f"### Assistant:\n{msg['content']}\n"
|
41 |
+
prompt += "### Assistant:\n"
|
42 |
+
return prompt
|
43 |
+
|
44 |
+
# β
Input format
|
45 |
+
class ChatRequest(BaseModel):
|
46 |
+
messages: list # list of {"role": "user"/"assistant", "content": "..."}
|
47 |
+
|
48 |
+
@app.post("/chat")
|
49 |
+
async def chat(req: ChatRequest):
|
50 |
+
prompt = build_prompt(req.messages)
|
51 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
52 |
+
output = model.generate(
|
53 |
+
**inputs,
|
54 |
+
max_new_tokens=256,
|
55 |
+
do_sample=True,
|
56 |
+
temperature=0.7,
|
57 |
+
top_p=0.95,
|
58 |
+
eos_token_id=tokenizer.eos_token_id,
|
59 |
+
)
|
60 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
61 |
+
reply = response.split("### Assistant:")[-1].strip()
|
62 |
+
return {"response": reply}
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.40.1
|
2 |
+
torch==2.1.2
|
3 |
+
accelerate==0.29.3
|
4 |
+
peft==0.10.0
|
5 |
+
bitsandbytes==0.43.1
|
6 |
+
uvicorn==0.29.0
|
7 |
+
fastapi==0.110.2
|
8 |
+
python-multipart==0.0.9
|
9 |
+
gdown==5.1.0
|
start.sh
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
echo "π¦ Installing Python dependencies..."
|
4 |
+
pip install -q --upgrade pip
|
5 |
+
pip install -q torch transformers accelerate peft bitsandbytes gdown
|
6 |
+
|
7 |
+
echo "β
Dependencies installed."
|
8 |
+
|
9 |
+
echo "π Downloading latest adapter from Google Drive..."
|
10 |
+
python download_adapter.py
|
11 |
+
|
12 |
+
echo "π€ Launching AI chat server..."
|
13 |
+
python main.py
|