abiyyufahri commited on
Commit
7077007
·
verified ·
1 Parent(s): 6b36184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -45
app.py CHANGED
@@ -167,64 +167,34 @@ def extract_coordinates(text):
167
  return [(0.5, 0.5)]
168
 
169
  def cpu_inference(conversation, model, tokenizer, processor):
170
- """
171
- Inference function untuk CPU with better error handling
172
- """
173
  try:
174
- # Apply chat template
175
- text = processor.apply_chat_template(
176
- conversation,
177
- tokenize=False,
178
- add_generation_prompt=True
179
- )
180
-
181
- # Get image from conversation
182
- image = conversation[1]["content"][0]["image"]
183
-
184
- # Process inputs with proper padding
185
  inputs = processor(
186
- text=[text],
187
- images=[image],
188
  return_tensors="pt",
189
- padding=True, # Enable padding
190
- truncation=True, # Enable truncation for long texts
191
- max_length=512 # Set reasonable max length
192
  )
193
-
194
- # Generate response with proper error handling
195
  with torch.no_grad():
196
- try:
197
- outputs = model.generate(
198
- **inputs,
199
- max_new_tokens=256,
200
- do_sample=True,
201
- temperature=0.3,
202
- top_p=0.8,
203
- pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else tokenizer.pad_token_id
204
- )
205
- except Exception as e:
206
- logger.error(f"Generation error: {e}")
207
- # Try with simpler parameters
208
- outputs = model.generate(
209
- **inputs,
210
- max_new_tokens=128,
211
- do_sample=False,
212
- pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 0
213
- )
214
-
215
- # Decode response
216
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
217
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
218
-
219
- # Extract coordinates
220
  coordinates = extract_coordinates(response)
221
-
222
  return {
223
  "topk_points": coordinates,
224
  "response": response,
225
  "success": True
226
  }
227
-
228
  except Exception as e:
229
  logger.error(f"Inference error: {e}")
230
  return {
 
167
  return [(0.5, 0.5)]
168
 
169
  def cpu_inference(conversation, model, tokenizer, processor):
 
 
 
170
  try:
 
 
 
 
 
 
 
 
 
 
 
171
  inputs = processor(
172
+ conversation,
 
173
  return_tensors="pt",
174
+ padding=True,
175
+ truncation=True
 
176
  )
177
+
 
178
  with torch.no_grad():
179
+ outputs = model.generate(
180
+ **inputs,
181
+ max_new_tokens=256,
182
+ do_sample=True,
183
+ temperature=0.3,
184
+ top_p=0.8,
185
+ pad_token_id=tokenizer.eos_token_id or tokenizer.pad_token_id or 0
186
+ )
187
+
 
 
 
 
 
 
 
 
 
 
 
188
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
189
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
 
 
190
  coordinates = extract_coordinates(response)
191
+
192
  return {
193
  "topk_points": coordinates,
194
  "response": response,
195
  "success": True
196
  }
197
+
198
  except Exception as e:
199
  logger.error(f"Inference error: {e}")
200
  return {