Spaces:
Build error
Build error
File size: 1,408 Bytes
7cbfd20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
from fastapi import FastAPI, UploadFile, Form
from PIL import Image
from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
from transformers import Qwen2VLProcessor
from gui_actor.inference import inference
import torch
import io
app = FastAPI()
# Load model + processor at startup
MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
tokenizer = processor.tokenizer
model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="auto"
).eval()
@app.get("/")
def home():
return {"message": "GUI-Actor Space is running"}
@app.post("/predict/")
async def predict(
instruction: str = Form(...),
image: UploadFile = Form(...)
):
# Read and process image
img_bytes = await image.read()
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
# Auto resize if needed
max_width, max_height = 480, 270
if img.width > max_width or img.height > max_height:
img.thumbnail((max_width, max_height))
# Run inference
click_point = inference(
instruction=instruction,
image=img,
model=model,
processor=processor,
tokenizer=tokenizer
)
return {"click_point": click_point}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860)
|