Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -379,7 +379,7 @@ pdf_cache = {
|
|
379 |
"results": []
|
380 |
}
|
381 |
@spaces.GPU
|
382 |
-
def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens: int =
|
383 |
"""Run inference on an image with the given prompt using the selected model."""
|
384 |
try:
|
385 |
if model_name == "Camel-Doc-OCR-062825":
|
@@ -406,9 +406,18 @@ def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens:
|
|
406 |
]
|
407 |
}
|
408 |
]
|
409 |
-
|
410 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
with torch.no_grad():
|
414 |
generated_ids = model.generate(
|
@@ -421,7 +430,7 @@ def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens:
|
|
421 |
generated_ids = [
|
422 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
423 |
]
|
424 |
-
output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
425 |
return output_text
|
426 |
|
427 |
except Exception as e:
|
@@ -508,6 +517,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
508 |
if not file_path or not os.path.exists(file_path):
|
509 |
return None, "No file selected"
|
510 |
|
|
|
511 |
file_ext = os.path.splitext(file_path).lower()
|
512 |
|
513 |
try:
|
@@ -526,6 +536,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
526 |
"results": []
|
527 |
})
|
528 |
|
|
|
529 |
return images, f"Page 1 / {len(images)}"
|
530 |
|
531 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
@@ -725,14 +736,15 @@ def create_gradio_interface():
|
|
725 |
if not file_path:
|
726 |
return None, "Please upload a file first.", None
|
727 |
|
728 |
-
#
|
729 |
-
|
730 |
-
|
731 |
-
|
|
|
732 |
|
733 |
# Process the image(s)
|
734 |
if pdf_cache["file_type"] == "pdf":
|
735 |
-
# Process all pages for PDF
|
736 |
all_results = []
|
737 |
all_markdown = []
|
738 |
|
@@ -769,7 +781,7 @@ def create_gradio_interface():
|
|
769 |
# Process single image
|
770 |
result = process_image(
|
771 |
model_name,
|
772 |
-
image
|
773 |
min_pixels=int(min_pix) if min_pix else None,
|
774 |
max_pixels=int(max_pix) if max_pix else None
|
775 |
)
|
@@ -799,7 +811,7 @@ def create_gradio_interface():
|
|
799 |
def handle_file_upload(file_path):
|
800 |
"""Handle file upload and show preview"""
|
801 |
if not file_path:
|
802 |
-
return None, "No file loaded
|
803 |
|
804 |
image, page_info = load_file_for_preview(file_path)
|
805 |
return image, page_info
|
|
|
379 |
"results": []
|
380 |
}
|
381 |
@spaces.GPU
|
382 |
+
def inference(model_name: str, image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> str:
|
383 |
"""Run inference on an image with the given prompt using the selected model."""
|
384 |
try:
|
385 |
if model_name == "Camel-Doc-OCR-062825":
|
|
|
406 |
]
|
407 |
}
|
408 |
]
|
409 |
+
|
410 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
411 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
412 |
+
|
413 |
+
inputs = processor(
|
414 |
+
text=[text],
|
415 |
+
images=[image],
|
416 |
+
videos=video_inputs,
|
417 |
+
padding=True,
|
418 |
+
return_tensors="pt"
|
419 |
+
).to(device)
|
420 |
+
|
421 |
|
422 |
with torch.no_grad():
|
423 |
generated_ids = model.generate(
|
|
|
430 |
generated_ids = [
|
431 |
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
432 |
]
|
433 |
+
output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
434 |
return output_text
|
435 |
|
436 |
except Exception as e:
|
|
|
517 |
if not file_path or not os.path.exists(file_path):
|
518 |
return None, "No file selected"
|
519 |
|
520 |
+
# FIX 1: Access the second element of the tuple returned by os.path.splitext
|
521 |
file_ext = os.path.splitext(file_path).lower()
|
522 |
|
523 |
try:
|
|
|
536 |
"results": []
|
537 |
})
|
538 |
|
539 |
+
# FIX 2: Return only the first image for the preview component
|
540 |
return images, f"Page 1 / {len(images)}"
|
541 |
|
542 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
|
|
736 |
if not file_path:
|
737 |
return None, "Please upload a file first.", None
|
738 |
|
739 |
+
# This function now correctly returns a single image for preview
|
740 |
+
# and populates the cache for multi-page processing.
|
741 |
+
preview_img, page_info_str = load_file_for_preview(file_path)
|
742 |
+
if preview_img is None:
|
743 |
+
return None, page_info_str, None
|
744 |
|
745 |
# Process the image(s)
|
746 |
if pdf_cache["file_type"] == "pdf":
|
747 |
+
# Process all pages for PDF from the cache
|
748 |
all_results = []
|
749 |
all_markdown = []
|
750 |
|
|
|
781 |
# Process single image
|
782 |
result = process_image(
|
783 |
model_name,
|
784 |
+
preview_img, # Use the single loaded image
|
785 |
min_pixels=int(min_pix) if min_pix else None,
|
786 |
max_pixels=int(max_pix) if max_pix else None
|
787 |
)
|
|
|
811 |
def handle_file_upload(file_path):
|
812 |
"""Handle file upload and show preview"""
|
813 |
if not file_path:
|
814 |
+
return None, '<div class="page-info">No file loaded</div>'
|
815 |
|
816 |
image, page_info = load_file_for_preview(file_path)
|
817 |
return image, page_info
|