Spaces:
Build error
Build error
from fastapi import FastAPI, UploadFile, Form | |
from PIL import Image | |
from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer | |
from transformers import Qwen2VLProcessor | |
from gui_actor.inference import inference | |
import torch | |
import io | |
app = FastAPI() | |
# Load model + processor at startup | |
MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL" | |
processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME) | |
tokenizer = processor.tokenizer | |
model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float32, | |
device_map="auto" | |
).eval() | |
def home(): | |
return {"message": "GUI-Actor Space is running"} | |
async def predict( | |
instruction: str = Form(...), | |
image: UploadFile = Form(...) | |
): | |
# Read and process image | |
img_bytes = await image.read() | |
img = Image.open(io.BytesIO(img_bytes)).convert("RGB") | |
# Auto resize if needed | |
max_width, max_height = 480, 270 | |
if img.width > max_width or img.height > max_height: | |
img.thumbnail((max_width, max_height)) | |
# Run inference | |
click_point = inference( | |
instruction=instruction, | |
image=img, | |
model=model, | |
processor=processor, | |
tokenizer=tokenizer | |
) | |
return {"click_point": click_point} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run("app:app", host="0.0.0.0", port=7860) | |