ImageDataExtractor3

Runtime error

App Files Files Community

WebashalarForML commited on Oct 12, 2024

Commit

3e98e01

verified ·

1 Parent(s): 461f0f8

Update utility/utils.py

Browse files

Files changed (1) hide show

utility/utils.py +21 -17

utility/utils.py CHANGED Viewed

@@ -145,40 +145,44 @@ def ocr_with_paddle(img):
 def extract_text_from_images(image_paths):
     all_extracted_texts = {}
     all_extracted_imgs = {}
-    for image_path in image_paths:
         try:
             # Enhance the image before OCR
             enhanced_image = process_image(image_path, scale=2)
             # Perform OCR on the enhanced image and get boxes
             result, img_with_boxes = ocr_with_paddle(enhanced_image)
-            # Draw bounding boxes on the processed image
             img_result = Image.fromarray(enhanced_image)
-            #img_with_boxes = draw_boxes(img_result, boxes)
-            # genrating unique id to save the images
-            # Get the current date and time
-            current_time = datetime.now()
-            # Format it as a string to create a unique ID
-            unique_id = current_time.strftime("%Y%m%d%H%M%S%f")
-            #print(unique_id)
-            # Save the image with boxes
-            result_image_path = os.path.join(RESULT_FOLDER, f'result_{unique_id}_{os.path.basename(image_path)}')
-            #img_with_boxes.save(result_image_path)
-            cv2.imwrite(result_image_path, img_with_boxes)
-            # Store the text and image result paths
-            all_extracted_texts[image_path] = result
-            all_extracted_imgs[image_path] = result_image_path
         except ValueError as ve:
             print(f"Error processing image {image_path}: {ve}")
             continue  # Continue to the next image if there's an error
-    # Convert to JSON-compatible structure
     all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
     return all_extracted_texts, all_extracted_imgs_json

 def extract_text_from_images(image_paths):
     all_extracted_texts = {}
     all_extracted_imgs = {}
+    for idx, image_path in enumerate(image_paths, start=1):
         try:
+            # Generate sequential labels like 'image-1', 'image-2', etc.
+            image_label = f'image_{idx}'
             # Enhance the image before OCR
             enhanced_image = process_image(image_path, scale=2)
             # Perform OCR on the enhanced image and get boxes
             result, img_with_boxes = ocr_with_paddle(enhanced_image)
+            # Convert the enhanced image to PIL format (if needed)
             img_result = Image.fromarray(enhanced_image)
+            # Generate a unique ID for saving the result image
+            unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f")
+            # Save the image with bounding boxes
+            result_image_path = os.path.join(
+                RESULT_FOLDER, f'result_{unique_id}_{os.path.basename(image_path)}'
+            )
+            cv2.imwrite(result_image_path, img_with_boxes)
+            rst_path=f'https://webashalarforml-imagedataextractor3.hf.space/static/results/result_{unique_id}_{os.path.basename(image_path)}'
+            # Store results using the sequential image label
+            all_extracted_texts[image_label] = result
+            all_extracted_imgs[image_label] = rst_path
+            print("The all extracted text:", all_extracted_texts)
+            print("The all extracted images:", all_extracted_imgs)
         except ValueError as ve:
             print(f"Error processing image {image_path}: {ve}")
             continue  # Continue to the next image if there's an error
+    # Convert image paths to a JSON-compatible structure
     all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
     return all_extracted_texts, all_extracted_imgs_json