# app/main.py from fastapi import FastAPI, Form from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from peft import PeftModel import torch import os from app.download_adapter import download_latest_adapter # === Step 1: Download Adapter === download_latest_adapter() # === Step 2: Load Model and Tokenizer === BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct" ADAPTER_FOLDER = "adapter" HF_TOKEN = os.environ.get("HF_TOKEN", None) print("🚀 Loading base model...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN, trust_remote_code=True ) print("🔧 Applying LoRA adapter...") model = PeftModel.from_pretrained(base_model, ADAPTER_FOLDER) print("🧠Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # === Step 3: FastAPI App === app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow all origins for testing allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/", response_class=HTMLResponse) async def form(): return """
{prompt}
{response}
Ask again """