Ais commited on
Commit
18aea39
·
verified ·
1 Parent(s): e153221

Update app/main.py

Browse files
Files changed (1) hide show
  1. app/main.py +57 -26
app/main.py CHANGED
@@ -1,49 +1,80 @@
 
 
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
  from peft import PeftModel
3
  import torch
4
- import gdown
5
  import os
6
- import zipfile
7
 
8
- # Constants
 
 
 
 
 
9
  BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
10
  ADAPTER_FOLDER = "adapter"
11
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
 
13
- # Step 1: Download adapter zip from Drive (version 1)
14
- zip_url = "https://drive.google.com/uc?id=1z8U98kW9GD29t-3v8LDu0SsdqJ_vzNvQ" # Your .zip file link
15
- zip_path = "adapter.zip"
16
-
17
- if not os.path.exists(ADAPTER_FOLDER):
18
- print("📥 Downloading adapter...")
19
- gdown.download(zip_url, zip_path, quiet=False)
20
-
21
- print("📂 Extracting adapter...")
22
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
23
- zip_ref.extractall(ADAPTER_FOLDER)
24
-
25
- # Step 2: Load base model (non-quantized, CPU-friendly)
26
  print("🚀 Loading base model...")
27
  base_model = AutoModelForCausalLM.from_pretrained(
28
  BASE_MODEL,
29
  torch_dtype=torch.float16,
30
  device_map="auto",
31
- token=HF_TOKEN
 
32
  )
33
 
34
- # Step 3: Apply LoRA adapter
35
  print("🔧 Applying LoRA adapter...")
36
  model = PeftModel.from_pretrained(base_model, ADAPTER_FOLDER)
37
 
38
- # Step 4: Load tokenizer
39
  print("🧠 Loading tokenizer...")
40
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
41
 
42
- # Step 5: Inference pipeline
43
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
44
 
45
- # Step 6: Try a prompt
46
- prompt = "What is the capital of India?"
47
- print("💬 Prompt:", prompt)
48
- output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
49
- print("📤 Output:", output[0]["generated_text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ from fastapi import FastAPI, Form
3
+ from fastapi.responses import HTMLResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
  from peft import PeftModel
7
  import torch
 
8
  import os
 
9
 
10
+ from app.download_adapter import download_latest_adapter
11
+
12
+ # === Step 1: Download Adapter ===
13
+ download_latest_adapter()
14
+
15
+ # === Step 2: Load Model and Tokenizer ===
16
  BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
17
  ADAPTER_FOLDER = "adapter"
18
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  print("🚀 Loading base model...")
21
  base_model = AutoModelForCausalLM.from_pretrained(
22
  BASE_MODEL,
23
  torch_dtype=torch.float16,
24
  device_map="auto",
25
+ token=HF_TOKEN,
26
+ trust_remote_code=True
27
  )
28
 
 
29
  print("🔧 Applying LoRA adapter...")
30
  model = PeftModel.from_pretrained(base_model, ADAPTER_FOLDER)
31
 
 
32
  print("🧠 Loading tokenizer...")
33
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
34
 
 
35
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
36
 
37
+ # === Step 3: FastAPI App ===
38
+ app = FastAPI()
39
+
40
+ app.add_middleware(
41
+ CORSMiddleware,
42
+ allow_origins=["*"], # Allow all origins for testing
43
+ allow_credentials=True,
44
+ allow_methods=["*"],
45
+ allow_headers=["*"],
46
+ )
47
+
48
+ @app.get("/", response_class=HTMLResponse)
49
+ async def form():
50
+ return """
51
+ <html>
52
+ <head><title>Qwen Chat</title></head>
53
+ <body>
54
+ <h2>Ask something:</h2>
55
+ <form method="post">
56
+ <textarea name="prompt" rows="4" cols="60"></textarea><br>
57
+ <input type="submit" value="Generate">
58
+ </form>
59
+ </body>
60
+ </html>
61
+ """
62
+
63
+ @app.post("/", response_class=HTMLResponse)
64
+ async def generate(prompt: str = Form(...)):
65
+ full_prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
66
+ output = pipe(full_prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
67
+ response = output[0]["generated_text"].split("<|im_start|>assistant\n")[-1].strip()
68
+
69
+ return f"""
70
+ <html>
71
+ <head><title>Qwen Chat</title></head>
72
+ <body>
73
+ <h2>Your Prompt:</h2>
74
+ <p>{prompt}</p>
75
+ <h2>Response:</h2>
76
+ <p>{response}</p>
77
+ <a href="/">Ask again</a>
78
+ </body>
79
+ </html>
80
+ """