Sanjayraju30 commited on
Commit
7112c61
·
verified ·
1 Parent(s): 9d1fcb0

Update src/ocr_engine.py

Browse files
Files changed (1) hide show
  1. src/ocr_engine.py +8 -3
src/ocr_engine.py CHANGED
@@ -1,19 +1,24 @@
1
  import os
2
- os.system("apt-get update && apt-get install -y libgl1-mesa-glx") # ← This is the fix!
3
-
4
- import cv2
5
  import numpy as np
6
  import pytesseract
7
  from PIL import Image
8
 
9
  def extract_weight_from_image(pil_img):
 
 
 
 
 
10
  image = np.array(pil_img.convert("RGB"))
11
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
12
  blur = cv2.GaussianBlur(gray, (3, 3), 0)
13
  _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
14
  config = '--psm 7 -c tessedit_char_whitelist=0123456789.'
15
  data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT)
 
16
  extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text'])))
17
  confidences = [int(conf) for conf in data['conf'] if conf.isdigit()]
18
  avg_conf = sum(confidences)/len(confidences) if confidences else 0
 
19
  return extracted_text.strip(), avg_conf
 
1
  import os
 
 
 
2
  import numpy as np
3
  import pytesseract
4
  from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
+ # ✅ Install system lib first (before cv2 is imported)
8
+ os.system("apt-get update && apt-get install -y libgl1-mesa-glx")
9
+
10
+ import cv2 # ✅ delay import until libGL is ready
11
+
12
  image = np.array(pil_img.convert("RGB"))
13
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
14
  blur = cv2.GaussianBlur(gray, (3, 3), 0)
15
  _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
16
+
17
  config = '--psm 7 -c tessedit_char_whitelist=0123456789.'
18
  data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT)
19
+
20
  extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text'])))
21
  confidences = [int(conf) for conf in data['conf'] if conf.isdigit()]
22
  avg_conf = sum(confidences)/len(confidences) if confidences else 0
23
+
24
  return extracted_text.strip(), avg_conf