Spaces:

Ais203
/

aigen

Sleeping

App Files Files Community

Ais commited on 19 days ago

Commit

18aea39

verified ·

1 Parent(s): e153221

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +57 -26

app/main.py CHANGED Viewed

@@ -1,49 +1,80 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
 import torch
-import gdown
 import os
-import zipfile
-# Constants
 BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
 ADAPTER_FOLDER = "adapter"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-# Step 1: Download adapter zip from Drive (version 1)
-zip_url = "https://drive.google.com/uc?id=1z8U98kW9GD29t-3v8LDu0SsdqJ_vzNvQ"  # Your .zip file link
-zip_path = "adapter.zip"
-if not os.path.exists(ADAPTER_FOLDER):
-    print("📥 Downloading adapter...")
-    gdown.download(zip_url, zip_path, quiet=False)
-    print("📂 Extracting adapter...")
-    with zipfile.ZipFile(zip_path, "r") as zip_ref:
-        zip_ref.extractall(ADAPTER_FOLDER)
-# Step 2: Load base model (non-quantized, CPU-friendly)
 print("🚀 Loading base model...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float16,
     device_map="auto",
-    token=HF_TOKEN
 )
-# Step 3: Apply LoRA adapter
 print("🔧 Applying LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, ADAPTER_FOLDER)
-# Step 4: Load tokenizer
 print("🧠 Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
-# Step 5: Inference pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# Step 6: Try a prompt
-prompt = "What is the capital of India?"
-print("💬 Prompt:", prompt)
-output = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
-print("📤 Output:", output[0]["generated_text"])

+# app/main.py
+from fastapi import FastAPI, Form
+from fastapi.responses import HTMLResponse
+from fastapi.middleware.cors import CORSMiddleware
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
 import torch
 import os
+from app.download_adapter import download_latest_adapter
+# === Step 1: Download Adapter ===
+download_latest_adapter()
+# === Step 2: Load Model and Tokenizer ===
 BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
 ADAPTER_FOLDER = "adapter"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 print("🚀 Loading base model...")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL,
     torch_dtype=torch.float16,
     device_map="auto",
+    token=HF_TOKEN,
+    trust_remote_code=True
 )
 print("🔧 Applying LoRA adapter...")
 model = PeftModel.from_pretrained(base_model, ADAPTER_FOLDER)
 print("🧠 Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# === Step 3: FastAPI App ===
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins for testing
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/", response_class=HTMLResponse)
+async def form():
+    return """
+    <html>
+        <head><title>Qwen Chat</title></head>
+        <body>
+            <h2>Ask something:</h2>
+            <form method="post">
+                <textarea name="prompt" rows="4" cols="60"></textarea><br>
+                <input type="submit" value="Generate">
+            </form>
+        </body>
+    </html>
+    """
+@app.post("/", response_class=HTMLResponse)
+async def generate(prompt: str = Form(...)):
+    full_prompt = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+    output = pipe(full_prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
+    response = output[0]["generated_text"].split("<|im_start|>assistant\n")[-1].strip()
+    return f"""
+    <html>
+        <head><title>Qwen Chat</title></head>
+        <body>
+            <h2>Your Prompt:</h2>
+            <p>{prompt}</p>
+            <h2>Response:</h2>
+            <p>{response}</p>
+            <a href="/">Ask again</a>
+        </body>
+    </html>
+    """