WebashalarForML commited on
Commit
3e98e01
·
verified ·
1 Parent(s): 461f0f8

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +21 -17
utility/utils.py CHANGED
@@ -145,40 +145,44 @@ def ocr_with_paddle(img):
145
  def extract_text_from_images(image_paths):
146
  all_extracted_texts = {}
147
  all_extracted_imgs = {}
148
- for image_path in image_paths:
 
149
  try:
 
 
 
150
  # Enhance the image before OCR
151
  enhanced_image = process_image(image_path, scale=2)
152
 
153
  # Perform OCR on the enhanced image and get boxes
154
  result, img_with_boxes = ocr_with_paddle(enhanced_image)
155
 
156
- # Draw bounding boxes on the processed image
157
  img_result = Image.fromarray(enhanced_image)
158
- #img_with_boxes = draw_boxes(img_result, boxes)
159
 
160
- # genrating unique id to save the images
161
- # Get the current date and time
162
- current_time = datetime.now()
163
 
164
- # Format it as a string to create a unique ID
165
- unique_id = current_time.strftime("%Y%m%d%H%M%S%f")
 
 
 
166
 
167
- #print(unique_id)
168
 
169
- # Save the image with boxes
170
- result_image_path = os.path.join(RESULT_FOLDER, f'result_{unique_id}_{os.path.basename(image_path)}')
171
- #img_with_boxes.save(result_image_path)
172
- cv2.imwrite(result_image_path, img_with_boxes)
 
 
173
 
174
- # Store the text and image result paths
175
- all_extracted_texts[image_path] = result
176
- all_extracted_imgs[image_path] = result_image_path
177
  except ValueError as ve:
178
  print(f"Error processing image {image_path}: {ve}")
179
  continue # Continue to the next image if there's an error
180
 
181
- # Convert to JSON-compatible structure
182
  all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
183
  return all_extracted_texts, all_extracted_imgs_json
184
 
 
145
  def extract_text_from_images(image_paths):
146
  all_extracted_texts = {}
147
  all_extracted_imgs = {}
148
+
149
+ for idx, image_path in enumerate(image_paths, start=1):
150
  try:
151
+ # Generate sequential labels like 'image-1', 'image-2', etc.
152
+ image_label = f'image_{idx}'
153
+
154
  # Enhance the image before OCR
155
  enhanced_image = process_image(image_path, scale=2)
156
 
157
  # Perform OCR on the enhanced image and get boxes
158
  result, img_with_boxes = ocr_with_paddle(enhanced_image)
159
 
160
+ # Convert the enhanced image to PIL format (if needed)
161
  img_result = Image.fromarray(enhanced_image)
 
162
 
163
+ # Generate a unique ID for saving the result image
164
+ unique_id = datetime.now().strftime("%Y%m%d%H%M%S%f")
 
165
 
166
+ # Save the image with bounding boxes
167
+ result_image_path = os.path.join(
168
+ RESULT_FOLDER, f'result_{unique_id}_{os.path.basename(image_path)}'
169
+ )
170
+ cv2.imwrite(result_image_path, img_with_boxes)
171
 
172
+ rst_path=f'https://webashalarforml-imagedataextractor3.hf.space/static/results/result_{unique_id}_{os.path.basename(image_path)}'
173
 
174
+ # Store results using the sequential image label
175
+ all_extracted_texts[image_label] = result
176
+ all_extracted_imgs[image_label] = rst_path
177
+
178
+ print("The all extracted text:", all_extracted_texts)
179
+ print("The all extracted images:", all_extracted_imgs)
180
 
 
 
 
181
  except ValueError as ve:
182
  print(f"Error processing image {image_path}: {ve}")
183
  continue # Continue to the next image if there's an error
184
 
185
+ # Convert image paths to a JSON-compatible structure
186
  all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
187
  return all_extracted_texts, all_extracted_imgs_json
188