Spaces:

RishitMishra
/

rishit-gui-actor

Build error

RishitMishra commited on Jul 5

Commit

7cbfd20

verified ·

1 Parent(s): 92e7874

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,47 +1,52 @@
-from fastapi import FastAPI, UploadFile, Form
-from PIL import Image
-from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
-from transformers import Qwen2VLProcessor
-from gui_actor.inference import inference
-import torch
-import io
-app = FastAPI()
-# Load model + processor at startup
-MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
-processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
-tokenizer = processor.tokenizer
-model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float32,
-    device_map="auto"
-).eval()
-@app.get("/")
-def home():
-    return {"message": "GUI-Actor Space is running"}
-@app.post("/predict/")
-async def predict(
-    instruction: str = Form(...),
-    image: UploadFile = Form(...)
-):
-    # Read and process image
-    img_bytes = await image.read()
-    img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-    # Auto resize if needed
-    max_width, max_height = 480, 270
-    if img.width > max_width or img.height > max_height:
-        img.thumbnail((max_width, max_height))
-    # Run inference
-    click_point = inference(
-        instruction=instruction,
-        image=img,
-        model=model,
-        processor=processor,
-        tokenizer=tokenizer
-    )
-    return {"click_point": click_point}

+from fastapi import FastAPI, UploadFile, Form
+from PIL import Image
+from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
+from transformers import Qwen2VLProcessor
+from gui_actor.inference import inference
+import torch
+import io
+app = FastAPI()
+# Load model + processor at startup
+MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
+processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
+tokenizer = processor.tokenizer
+model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch.float32,
+    device_map="auto"
+).eval()
+@app.get("/")
+def home():
+    return {"message": "GUI-Actor Space is running"}
+@app.post("/predict/")
+async def predict(
+    instruction: str = Form(...),
+    image: UploadFile = Form(...)
+):
+    # Read and process image
+    img_bytes = await image.read()
+    img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+    # Auto resize if needed
+    max_width, max_height = 480, 270
+    if img.width > max_width or img.height > max_height:
+        img.thumbnail((max_width, max_height))
+    # Run inference
+    click_point = inference(
+        instruction=instruction,
+        image=img,
+        model=model,
+        processor=processor,
+        tokenizer=tokenizer
+    )
+    return {"click_point": click_point}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860)