Ais commited on
Commit
48b2ebf
Β·
0 Parent(s):

πŸš€ Initial commit of FastAPI LoRA chatbot

Browse files
Files changed (5) hide show
  1. Dockerfile +13 -0
  2. app/download_adapter.py +41 -0
  3. app/main.py +62 -0
  4. requirements.txt +9 -0
  5. start.sh +13 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["bash", "start.sh"]
app/download_adapter.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gdown
3
+ import re
4
+
5
+ # βœ… Google Drive folder with adapter versions like 'version 1', 'version 2', etc.
6
+ DRIVE_FOLDER_URL = "https://drive.google.com/drive/folders/1S9xT92Zm9rZ4RSCxAe_DLld8vu78mqW4"
7
+ LOCAL_DEST = "adapter" # Where we'll copy the latest version
8
+
9
+ def download_latest_adapter():
10
+ print("πŸ”½ Downloading adapter folder from Google Drive...")
11
+
12
+ # Download everything from the Drive folder into temp dir
13
+ gdown.download_folder(url=DRIVE_FOLDER_URL, output="gdrive_tmp", quiet=False, use_cookies=False)
14
+
15
+ # Find all folders named "version X"
16
+ all_versions = sorted(
17
+ [d for d in os.listdir("gdrive_tmp") if re.match(r"version \d+", d)],
18
+ key=lambda x: int(x.split()[-1])
19
+ )
20
+
21
+ if not all_versions:
22
+ raise ValueError("❌ No version folders found in Google Drive folder.")
23
+
24
+ latest = all_versions[-1]
25
+ src = os.path.join("gdrive_tmp", latest)
26
+ print(f"βœ… Latest adapter found: {latest}")
27
+
28
+ # Ensure destination exists
29
+ os.makedirs(LOCAL_DEST, exist_ok=True)
30
+
31
+ # Copy files to destination
32
+ for file in os.listdir(src):
33
+ src_file = os.path.join(src, file)
34
+ dest_file = os.path.join(LOCAL_DEST, file)
35
+ os.system(f"cp '{src_file}' '{dest_file}'")
36
+
37
+ print(f"βœ… Adapter copied to: {LOCAL_DEST}")
38
+
39
+ # βœ… Run automatically if script is executed directly
40
+ if __name__ == "__main__":
41
+ download_latest_adapter()
app/main.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ from peft import PeftModel
5
+ import torch
6
+
7
+ app = FastAPI()
8
+
9
+ # βœ… Load tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
11
+ tokenizer.pad_token = tokenizer.eos_token
12
+
13
+ # βœ… Setup quantization config
14
+ bnb_config = BitsAndBytesConfig(
15
+ load_in_4bit=True,
16
+ bnb_4bit_use_double_quant=True,
17
+ bnb_4bit_quant_type="nf4",
18
+ bnb_4bit_compute_dtype=torch.float16
19
+ )
20
+
21
+ # βœ… Load base model
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ "mistralai/Mistral-7B-Instruct-v0.2",
24
+ device_map="auto",
25
+ quantization_config=bnb_config
26
+ )
27
+
28
+ # βœ… Load LoRA adapter (ensure it's downloaded)
29
+ ADAPTER_DIR = "./adapter/version 1"
30
+ model = PeftModel.from_pretrained(model, ADAPTER_DIR)
31
+ model.eval()
32
+
33
+ # βœ… Build prompt from messages
34
+ def build_prompt(messages):
35
+ prompt = ""
36
+ for msg in messages:
37
+ if msg["role"] == "user":
38
+ prompt += f"### User:\n{msg['content']}\n"
39
+ elif msg["role"] == "assistant":
40
+ prompt += f"### Assistant:\n{msg['content']}\n"
41
+ prompt += "### Assistant:\n"
42
+ return prompt
43
+
44
+ # βœ… Input format
45
+ class ChatRequest(BaseModel):
46
+ messages: list # list of {"role": "user"/"assistant", "content": "..."}
47
+
48
+ @app.post("/chat")
49
+ async def chat(req: ChatRequest):
50
+ prompt = build_prompt(req.messages)
51
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
52
+ output = model.generate(
53
+ **inputs,
54
+ max_new_tokens=256,
55
+ do_sample=True,
56
+ temperature=0.7,
57
+ top_p=0.95,
58
+ eos_token_id=tokenizer.eos_token_id,
59
+ )
60
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
61
+ reply = response.split("### Assistant:")[-1].strip()
62
+ return {"response": reply}
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers==4.40.1
2
+ torch==2.1.2
3
+ accelerate==0.29.3
4
+ peft==0.10.0
5
+ bitsandbytes==0.43.1
6
+ uvicorn==0.29.0
7
+ fastapi==0.110.2
8
+ python-multipart==0.0.9
9
+ gdown==5.1.0
start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "πŸ“¦ Installing Python dependencies..."
4
+ pip install -q --upgrade pip
5
+ pip install -q torch transformers accelerate peft bitsandbytes gdown
6
+
7
+ echo "βœ… Dependencies installed."
8
+
9
+ echo "πŸ“‚ Downloading latest adapter from Google Drive..."
10
+ python download_adapter.py
11
+
12
+ echo "πŸ€– Launching AI chat server..."
13
+ python main.py