geethareddy commited on
Commit
c0652ff
·
verified ·
1 Parent(s): 496e98a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -16
app.py CHANGED
@@ -111,7 +111,7 @@ def detect_roi(img_cv):
111
  return img_cv
112
 
113
  def extract_weight(img):
114
- """Extract weight from image using Tesseract OCR with multiple PSM modes."""
115
  try:
116
  if img is None:
117
  logging.error("No image provided for OCR")
@@ -124,29 +124,26 @@ def extract_weight(img):
124
  # Preprocess the ROI
125
  processed_img = preprocess_image(roi_img)
126
 
127
- # Try multiple PSM modes for better detection
128
- psm_modes = [
129
- ('--psm 7 digits', 'Single line, digits only'),
130
- ('--psm 6 digits', 'Single block, digits only'),
131
- ('--psm 10 digits', 'Single character, digits only'),
132
- ('--psm 8 digits', 'Single word, digits only')
133
- ]
134
 
135
- for config, desc in psm_modes:
136
- text = pytesseract.image_to_string(processed_img, config=config)
137
- logging.info(f"OCR attempt with {desc}: Raw text = '{text}'")
138
- weight = ''.join(filter(lambda x: x in '0123456789.', text.strip()))
 
 
 
139
  try:
140
  weight_float = float(weight)
141
- if weight_float >= 0: # Allow zero weights
142
- confidence = 95.0 # Simplified confidence for valid numbers
143
  logging.info(f"Weight detected: {weight} (Confidence: {confidence:.2f}%)")
144
  return weight, confidence
145
  except ValueError:
146
  logging.warning(f"Invalid number format: {weight}")
147
- continue
148
 
149
- logging.error("All OCR attempts failed to detect a valid weight")
150
  return "Not detected", 0.0
151
  except Exception as e:
152
  logging.error(f"OCR processing failed: {str(e)}")
 
111
  return img_cv
112
 
113
  def extract_weight(img):
114
+ """Extract weight from image using Tesseract OCR with improved configuration."""
115
  try:
116
  if img is None:
117
  logging.error("No image provided for OCR")
 
124
  # Preprocess the ROI
125
  processed_img = preprocess_image(roi_img)
126
 
127
+ # OCR configuration for digit extraction
128
+ custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
 
 
 
 
 
129
 
130
+ # Run OCR
131
+ text = pytesseract.image_to_string(processed_img, config=custom_config)
132
+ logging.info(f"OCR result: '{text}'")
133
+
134
+ # Extract valid weight from OCR result
135
+ weight = ''.join(filter(lambda x: x in '0123456789.', text.strip()))
136
+ if weight:
137
  try:
138
  weight_float = float(weight)
139
+ if weight_float >= 0: # Only accept valid weights
140
+ confidence = 95.0 # Assume high confidence if we have a valid weight
141
  logging.info(f"Weight detected: {weight} (Confidence: {confidence:.2f}%)")
142
  return weight, confidence
143
  except ValueError:
144
  logging.warning(f"Invalid number format: {weight}")
 
145
 
146
+ logging.error("OCR failed to detect a valid weight")
147
  return "Not detected", 0.0
148
  except Exception as e:
149
  logging.error(f"OCR processing failed: {str(e)}")