Ais commited on
Commit
0053216
·
verified ·
1 Parent(s): 1e414fd

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +37 -57
app/main.py CHANGED
@@ -1,75 +1,55 @@
 
 
 
 
1
  import os
2
  import gdown
3
- import re
4
- import torch
5
- from fastapi import FastAPI, Request
6
- from pydantic import BaseModel
7
- from peft import PeftModel, PeftConfig
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
9
 
10
  app = FastAPI()
11
 
12
- DRIVE_FOLDER_URL = "https://drive.google.com/drive/folders/1S9xT92Zm9rZ4RSCxAe_DLld8vu78mqW4"
13
- LOCAL_ADAPTER_DIR = "adapter"
14
- BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
15
-
16
- class PromptRequest(BaseModel):
17
- prompt: str
18
 
19
- def download_latest_adapter():
20
- print("🔽 Downloading adapter folder from Google Drive...")
21
- gdown.download_folder(url=DRIVE_FOLDER_URL, output="gdrive_tmp", quiet=False, use_cookies=False)
22
 
23
- all_versions = sorted(
24
- [d for d in os.listdir("gdrive_tmp") if re.match(r"version \d+", d)],
25
- key=lambda x: int(x.split()[-1])
26
- )
27
- if not all_versions:
28
- raise ValueError("❌ No version folders found in Google Drive folder.")
29
 
30
- latest = all_versions[-1]
31
- src = os.path.join("gdrive_tmp", latest)
32
- print(f"✅ Latest adapter found: {latest}")
33
 
34
- os.makedirs(LOCAL_ADAPTER_DIR, exist_ok=True)
35
- for file in os.listdir(src):
36
- src_file = os.path.join(src, file)
37
- dest_file = os.path.join(LOCAL_ADAPTER_DIR, file)
38
- os.system(f"cp '{src_file}' '{dest_file}'")
39
 
40
- print(f"✅ Adapter copied to: {LOCAL_ADAPTER_DIR}")
 
 
 
41
 
42
- def load_model():
43
- print("🚀 Loading base model...")
44
- model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto", torch_dtype=torch.float16)
45
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
46
-
47
- print("🔗 Loading adapter...")
48
- model = PeftModel.from_pretrained(model, LOCAL_ADAPTER_DIR)
49
- model.eval()
50
 
51
- return model, tokenizer
52
-
53
- # Step 1: Download latest adapter
54
- download_latest_adapter()
55
-
56
- # Step 2: Load model and tokenizer
57
- model, tokenizer = load_model()
58
-
59
- @app.post("/generate")
60
- async def generate_text(request: PromptRequest):
61
- prompt = request.prompt.strip()
62
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
63
 
 
64
  with torch.no_grad():
65
  outputs = model.generate(
66
- input_ids,
67
- max_new_tokens=300,
68
- do_sample=True,
69
  temperature=0.7,
70
- top_p=0.95,
71
- eos_token_id=tokenizer.eos_token_id,
72
  )
73
 
74
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
75
- return {"response": result[len(prompt):].strip()}
 
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
3
+ from peft import PeftModel
4
+ import torch
5
  import os
6
  import gdown
 
 
 
 
 
 
7
 
8
  app = FastAPI()
9
 
10
+ # Auto-download adapter from Google Drive (if not already present)
11
+ ADAPTER_DIR = "adapter"
12
+ ADAPTER_PATH = os.path.join(ADAPTER_DIR, "adapter_model.safetensors")
13
+ DRIVE_FILE_ID = "1wnuE5t_m4ojI7YqxXZ8lBdtDFoHJJ6_H" # version 1 model
 
 
14
 
15
+ if not os.path.exists(ADAPTER_PATH):
16
+ os.makedirs(ADAPTER_DIR, exist_ok=True)
17
+ gdown.download(f"https://drive.google.com/uc?id={DRIVE_FILE_ID}", ADAPTER_PATH, quiet=False)
18
 
19
+ # Load base model
20
+ base_model = AutoModelForCausalLM.from_pretrained(
21
+ "Qwen/Qwen2-0.5B-Instruct",
22
+ device_map="auto",
23
+ torch_dtype=torch.float16
24
+ )
25
 
26
+ # Load tokenizer
27
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
 
28
 
29
+ # Load LoRA adapter
30
+ model = PeftModel.from_pretrained(base_model, ADAPTER_DIR)
31
+ model.eval()
 
 
32
 
33
+ @app.post("/chat")
34
+ async def chat(request: Request):
35
+ data = await request.json()
36
+ prompt = data.get("prompt")
37
 
38
+ if not prompt:
39
+ return {"error": "No prompt provided."}
 
 
 
 
 
 
40
 
41
+ full_prompt = f"<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
44
  with torch.no_grad():
45
  outputs = model.generate(
46
+ **inputs,
47
+ max_new_tokens=256,
 
48
  temperature=0.7,
49
+ do_sample=True,
50
+ top_p=0.9
51
  )
52
 
53
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+ response = response.split("<|im_start|>assistant\n")[-1].strip()
55
+ return {"response": response}