RishitMishra commited on
Commit
7cbfd20
·
verified ·
1 Parent(s): 92e7874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -47
app.py CHANGED
@@ -1,47 +1,52 @@
1
- from fastapi import FastAPI, UploadFile, Form
2
- from PIL import Image
3
- from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
4
- from transformers import Qwen2VLProcessor
5
- from gui_actor.inference import inference
6
- import torch
7
- import io
8
-
9
- app = FastAPI()
10
-
11
- # Load model + processor at startup
12
- MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
13
- processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
14
- tokenizer = processor.tokenizer
15
- model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
16
- MODEL_NAME,
17
- torch_dtype=torch.float32,
18
- device_map="auto"
19
- ).eval()
20
-
21
- @app.get("/")
22
- def home():
23
- return {"message": "GUI-Actor Space is running"}
24
-
25
- @app.post("/predict/")
26
- async def predict(
27
- instruction: str = Form(...),
28
- image: UploadFile = Form(...)
29
- ):
30
- # Read and process image
31
- img_bytes = await image.read()
32
- img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
33
-
34
- # Auto resize if needed
35
- max_width, max_height = 480, 270
36
- if img.width > max_width or img.height > max_height:
37
- img.thumbnail((max_width, max_height))
38
-
39
- # Run inference
40
- click_point = inference(
41
- instruction=instruction,
42
- image=img,
43
- model=model,
44
- processor=processor,
45
- tokenizer=tokenizer
46
- )
47
- return {"click_point": click_point}
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, Form
2
+ from PIL import Image
3
+ from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
4
+ from transformers import Qwen2VLProcessor
5
+ from gui_actor.inference import inference
6
+ import torch
7
+ import io
8
+
9
+ app = FastAPI()
10
+
11
+ # Load model + processor at startup
12
+ MODEL_NAME = "microsoft/GUI-Actor-2B-Qwen2-VL"
13
+ processor = Qwen2VLProcessor.from_pretrained(MODEL_NAME)
14
+ tokenizer = processor.tokenizer
15
+ model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
16
+ MODEL_NAME,
17
+ torch_dtype=torch.float32,
18
+ device_map="auto"
19
+ ).eval()
20
+
21
+ @app.get("/")
22
+ def home():
23
+ return {"message": "GUI-Actor Space is running"}
24
+
25
+ @app.post("/predict/")
26
+ async def predict(
27
+ instruction: str = Form(...),
28
+ image: UploadFile = Form(...)
29
+ ):
30
+ # Read and process image
31
+ img_bytes = await image.read()
32
+ img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
33
+
34
+ # Auto resize if needed
35
+ max_width, max_height = 480, 270
36
+ if img.width > max_width or img.height > max_height:
37
+ img.thumbnail((max_width, max_height))
38
+
39
+ # Run inference
40
+ click_point = inference(
41
+ instruction=instruction,
42
+ image=img,
43
+ model=model,
44
+ processor=processor,
45
+ tokenizer=tokenizer
46
+ )
47
+ return {"click_point": click_point}
48
+
49
+ if __name__ == "__main__":
50
+ import uvicorn
51
+ uvicorn.run("app:app", host="0.0.0.0", port=7860)
52
+