prithivMLmods commited on
Commit
b04f6ab
·
verified ·
1 Parent(s): 30d6225

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -379,7 +379,7 @@ pdf_cache = {
379
  "results": []
380
  }
381
  @spaces.GPU
382
- def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens: int = 1024) -> str:
383
  """Run inference on an image with the given prompt using the selected model."""
384
  try:
385
  if model_name == "Camel-Doc-OCR-062825":
@@ -406,9 +406,18 @@ def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens:
406
  ]
407
  }
408
  ]
409
-
410
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
411
- inputs = processor(text, [image], return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
412
 
413
  with torch.no_grad():
414
  generated_ids = model.generate(
@@ -421,7 +430,7 @@ def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens:
421
  generated_ids = [
422
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
423
  ]
424
- output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
425
  return output_text
426
 
427
  except Exception as e:
@@ -508,6 +517,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
508
  if not file_path or not os.path.exists(file_path):
509
  return None, "No file selected"
510
 
 
511
  file_ext = os.path.splitext(file_path).lower()
512
 
513
  try:
@@ -526,6 +536,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
526
  "results": []
527
  })
528
 
 
529
  return images, f"Page 1 / {len(images)}"
530
 
531
  elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
@@ -725,14 +736,15 @@ def create_gradio_interface():
725
  if not file_path:
726
  return None, "Please upload a file first.", None
727
 
728
- # Load and preview file
729
- image, page_info = load_file_for_preview(file_path)
730
- if image is None:
731
- return None, page_info, None
 
732
 
733
  # Process the image(s)
734
  if pdf_cache["file_type"] == "pdf":
735
- # Process all pages for PDF
736
  all_results = []
737
  all_markdown = []
738
 
@@ -769,7 +781,7 @@ def create_gradio_interface():
769
  # Process single image
770
  result = process_image(
771
  model_name,
772
- image,
773
  min_pixels=int(min_pix) if min_pix else None,
774
  max_pixels=int(max_pix) if max_pix else None
775
  )
@@ -799,7 +811,7 @@ def create_gradio_interface():
799
  def handle_file_upload(file_path):
800
  """Handle file upload and show preview"""
801
  if not file_path:
802
- return None, "No file loaded"
803
 
804
  image, page_info = load_file_for_preview(file_path)
805
  return image, page_info
 
379
  "results": []
380
  }
381
  @spaces.GPU
382
+ def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
383
  """Run inference on an image with the given prompt using the selected model."""
384
  try:
385
  if model_name == "Camel-Doc-OCR-062825":
 
406
  ]
407
  }
408
  ]
409
+
410
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
411
+ image_inputs, video_inputs = process_vision_info(messages)
412
+
413
+ inputs = processor(
414
+ text=[text],
415
+ images=[image],
416
+ videos=video_inputs,
417
+ padding=True,
418
+ return_tensors="pt"
419
+ ).to(device)
420
+
421
 
422
  with torch.no_grad():
423
  generated_ids = model.generate(
 
430
  generated_ids = [
431
  out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
432
  ]
433
+ output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
434
  return output_text
435
 
436
  except Exception as e:
 
517
  if not file_path or not os.path.exists(file_path):
518
  return None, "No file selected"
519
 
520
+ # FIX 1: Access the second element of the tuple returned by os.path.splitext
521
  file_ext = os.path.splitext(file_path).lower()
522
 
523
  try:
 
536
  "results": []
537
  })
538
 
539
+ # FIX 2: Return only the first image for the preview component
540
  return images, f"Page 1 / {len(images)}"
541
 
542
  elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
 
736
  if not file_path:
737
  return None, "Please upload a file first.", None
738
 
739
+ # This function now correctly returns a single image for preview
740
+ # and populates the cache for multi-page processing.
741
+ preview_img, page_info_str = load_file_for_preview(file_path)
742
+ if preview_img is None:
743
+ return None, page_info_str, None
744
 
745
  # Process the image(s)
746
  if pdf_cache["file_type"] == "pdf":
747
+ # Process all pages for PDF from the cache
748
  all_results = []
749
  all_markdown = []
750
 
 
781
  # Process single image
782
  result = process_image(
783
  model_name,
784
+ preview_img, # Use the single loaded image
785
  min_pixels=int(min_pix) if min_pix else None,
786
  max_pixels=int(max_pix) if max_pix else None
787
  )
 
811
  def handle_file_upload(file_path):
812
  """Handle file upload and show preview"""
813
  if not file_path:
814
+ return None, '<div class="page-info">No file loaded</div>'
815
 
816
  image, page_info = load_file_for_preview(file_path)
817
  return image, page_info