abiyyufahri commited on
Commit
1c943af
·
1 Parent(s): 78565ee

Install error fix attemp 8

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -1
  2. main.py +67 -27
  3. requirements.txt +2 -1
Dockerfile CHANGED
@@ -10,11 +10,13 @@ USER user
10
  ENV PATH="/home/user/.local/bin:$PATH"
11
 
12
  WORKDIR /app
 
 
13
  COPY --chown=user requirements.txt ./
14
 
15
  # Install dependencies step by step
16
  RUN pip install --upgrade pip && \
17
- pip install --no-cache-dir packaging ninja wheel setuptools numpy
18
 
19
  # Install PyTorch CPU version
20
  RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
@@ -37,6 +39,10 @@ RUN pip install --no-cache-dir \
37
  opencv-python-headless \
38
  deepspeed==0.16.0
39
 
 
40
  COPY --chown=user . .
41
 
 
 
 
42
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
10
  ENV PATH="/home/user/.local/bin:$PATH"
11
 
12
  WORKDIR /app
13
+
14
+ # Copy requirements first for better caching
15
  COPY --chown=user requirements.txt ./
16
 
17
  # Install dependencies step by step
18
  RUN pip install --upgrade pip && \
19
+ pip install --no-cache-dir packaging ninja wheel setuptools "numpy<2.0.0"
20
 
21
  # Install PyTorch CPU version
22
  RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 
39
  opencv-python-headless \
40
  deepspeed==0.16.0
41
 
42
+ # Copy all application files
43
  COPY --chown=user . .
44
 
45
+ # Ensure main.py exists and is readable
46
+ RUN ls -la /app/ && cat /app/main.py | head -10
47
+
48
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py CHANGED
@@ -6,31 +6,55 @@ from io import BytesIO
6
  import base64
7
  import torch
8
  import re
9
- from transformers import AutoModelForCausalLM, AutoProcessor
10
 
11
- app = FastAPI()
12
 
13
- # Load model untuk CPU
 
 
 
14
  model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
15
 
16
- # Load processor
17
- try:
18
- processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
19
- except Exception as e:
20
- print(f"Failed to load AutoProcessor: {e}")
21
- from transformers import Qwen2VLProcessor
22
- processor = Qwen2VLProcessor.from_pretrained(model_name)
23
-
24
- tokenizer = processor.tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Load model dengan CPU support
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_name,
29
- torch_dtype=torch.float32, # float32 untuk CPU
30
- device_map=None, # CPU only
31
- trust_remote_code=True, # untuk custom model
32
- attn_implementation=None # skip flash attention
33
- ).eval()
34
 
35
  class Base64Request(BaseModel):
36
  image_base64: str
@@ -67,7 +91,7 @@ def extract_coordinates(text):
67
 
68
  def cpu_inference(conversation, model, tokenizer, processor):
69
  """
70
- Inference function untuk CPU tanpa GUI-Actor dependencies
71
  """
72
  try:
73
  # Apply chat template
@@ -118,8 +142,27 @@ def cpu_inference(conversation, model, tokenizer, processor):
118
  "success": False
119
  }
120
 
 
 
 
 
 
 
 
 
121
  @app.post("/click/base64")
122
  async def predict_click_base64(data: Base64Request):
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
  # Decode base64 to image
125
  image_data = base64.b64decode(data.image_base64.split(",")[-1])
@@ -178,7 +221,8 @@ async def health_check():
178
  "status": "healthy",
179
  "model": model_name,
180
  "device": "cpu",
181
- "torch_dtype": "float32"
 
182
  }
183
 
184
  @app.post("/click/form")
@@ -187,8 +231,4 @@ async def predict_click_form(
187
  instruction: str = Form(...)
188
  ):
189
  data = Base64Request(image_base64=image_base64, instruction=instruction)
190
- return await predict_click_base64(data)
191
-
192
- if __name__ == "__main__":
193
- import uvicorn
194
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
6
  import base64
7
  import torch
8
  import re
 
9
 
10
+ app = FastAPI(title="GUI-Actor API", version="1.0.0")
11
 
12
+ # Initialize global variables
13
+ model = None
14
+ processor = None
15
+ tokenizer = None
16
  model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
17
 
18
+ def load_model():
19
+ """Load model with proper error handling"""
20
+ global model, processor, tokenizer
21
+
22
+ try:
23
+ print("Loading processor...")
24
+ # Try different approaches to load the processor
25
+ try:
26
+ from transformers import Qwen2VLProcessor
27
+ processor = Qwen2VLProcessor.from_pretrained(model_name)
28
+ print("Successfully loaded Qwen2VLProcessor")
29
+ except Exception as e:
30
+ print(f"Failed to load Qwen2VLProcessor: {e}")
31
+ from transformers import AutoProcessor
32
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
33
+ print("Successfully loaded AutoProcessor")
34
+
35
+ tokenizer = processor.tokenizer
36
+
37
+ print("Loading model...")
38
+ # Use the correct model class for Qwen2VL
39
+ from transformers import Qwen2VLForConditionalGeneration
40
+
41
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
42
+ model_name,
43
+ torch_dtype=torch.float32, # float32 untuk CPU
44
+ device_map=None, # CPU only
45
+ trust_remote_code=True, # untuk custom model
46
+ attn_implementation=None # skip flash attention
47
+ ).eval()
48
+
49
+ print("Model loaded successfully!")
50
+ return True
51
+
52
+ except Exception as e:
53
+ print(f"Error loading model: {e}")
54
+ return False
55
 
56
+ # Load model at startup
57
+ model_loaded = load_model()
 
 
 
 
 
 
58
 
59
  class Base64Request(BaseModel):
60
  image_base64: str
 
91
 
92
  def cpu_inference(conversation, model, tokenizer, processor):
93
  """
94
+ Inference function untuk CPU
95
  """
96
  try:
97
  # Apply chat template
 
142
  "success": False
143
  }
144
 
145
+ @app.get("/")
146
+ async def root():
147
+ return {
148
+ "message": "GUI-Actor API is running",
149
+ "status": "healthy",
150
+ "model_loaded": model_loaded
151
+ }
152
+
153
  @app.post("/click/base64")
154
  async def predict_click_base64(data: Base64Request):
155
+ if not model_loaded:
156
+ return JSONResponse(
157
+ content={
158
+ "error": "Model not loaded properly",
159
+ "success": False,
160
+ "x": 0.5,
161
+ "y": 0.5
162
+ },
163
+ status_code=503
164
+ )
165
+
166
  try:
167
  # Decode base64 to image
168
  image_data = base64.b64decode(data.image_base64.split(",")[-1])
 
221
  "status": "healthy",
222
  "model": model_name,
223
  "device": "cpu",
224
+ "torch_dtype": "float32",
225
+ "model_loaded": model_loaded
226
  }
227
 
228
  @app.post("/click/form")
 
231
  instruction: str = Form(...)
232
  ):
233
  data = Base64Request(image_base64=image_base64, instruction=instruction)
234
+ return await predict_click_base64(data)
 
 
 
 
requirements.txt CHANGED
@@ -5,11 +5,12 @@ uvicorn[standard]
5
  transformers>=4.37.0
6
  datasets
7
  Pillow
 
 
8
  torch==2.2.2+cpu
9
  torchvision
10
  torchaudio
11
  --index-url https://download.pytorch.org/whl/cpu
12
  accelerate
13
  scipy
14
- numpy
15
  qwen-vl-utils
 
5
  transformers>=4.37.0
6
  datasets
7
  Pillow
8
+ # Fix NumPy compatibility issue
9
+ numpy<2.0.0
10
  torch==2.2.2+cpu
11
  torchvision
12
  torchaudio
13
  --index-url https://download.pytorch.org/whl/cpu
14
  accelerate
15
  scipy
 
16
  qwen-vl-utils