AutoWeightLoggergeetha

Sleeping

App Files Files Community

AutoWeightLoggergeetha / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

d23e846 verified 2 months ago

raw

history blame

12.3 kB

	import pytesseract
	import numpy as np
	import cv2
	import re
	import logging
	from datetime import datetime
	import os
	from PIL import Image

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Directory for debug images
	DEBUG_DIR = "debug_images"
	os.makedirs(DEBUG_DIR, exist_ok=True)

	def save_debug_image(img, filename_suffix, prefix=""):
	"""Save image to debug directory with timestamp."""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
	filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
	if isinstance(img, Image.Image):
	img.save(filename)
	elif len(img.shape) == 3:
	cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
	else:
	cv2.imwrite(filename, img)
	logging.info(f"Saved debug image: {filename}")

	def estimate_brightness(img):
	"""Estimate image brightness."""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	return np.mean(gray)

	def preprocess_image(img):
	"""Preprocess image for OCR with aggressive contrast and noise reduction."""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	brightness = estimate_brightness(img)
	# Aggressive CLAHE
	clahe_clip = 6.0 if brightness < 80 else 4.0
	clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
	enhanced = clahe.apply(gray)
	save_debug_image(enhanced, "01_preprocess_clahe")
	# Minimal blur to preserve edges
	blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
	save_debug_image(blurred, "02_preprocess_blur")
	# Multi-scale thresholding
	block_size = max(9, min(25, int(img.shape[0] / 20) * 2 + 1))
	thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, block_size, 7)
	# Morphological operations
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
	save_debug_image(thresh, "03_preprocess_morph")
	return thresh, enhanced

	def correct_rotation(img):
	"""Correct image rotation using edge detection."""
	try:
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	edges = cv2.Canny(gray, 30, 100, apertureSize=3)
	lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=40, minLineLength=20, maxLineGap=10)
	if lines is not None:
	angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
	angle = np.median(angles)
	if abs(angle) > 0.5:
	h, w = img.shape[:2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	img = cv2.warpAffine(img, M, (w, h))
	save_debug_image(img, "00_rotated_image")
	logging.info(f"Applied rotation: {angle:.2f} degrees")
	return img
	except Exception as e:
	logging.error(f"Rotation correction failed: {str(e)}")
	return img

	def detect_roi(img):
	"""Detect region of interest with aggressive contour filtering."""
	try:
	save_debug_image(img, "04_original")
	thresh, enhanced = preprocess_image(img)
	brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	block_sizes = [max(9, min(25, int(img.shape[0] / s) * 2 + 1)) for s in [10, 15, 20]]
	valid_contours = []
	img_area = img.shape[0] * img.shape[1]

	for block_size in block_sizes:
	temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, block_size, 7)
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
	save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
	contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	for c in contours:
	area = cv2.contourArea(c)
	x, y, w, h = cv2.boundingRect(c)
	roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
	aspect_ratio = w / h
	if (500 < area < (img_area * 0.5) and
	0.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40 and roi_brightness > 60):
	valid_contours.append((c, area * roi_brightness))
	logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")

	if valid_contours:
	contour, _ = max(valid_contours, key=lambda x: x[1])
	x, y, w, h = cv2.boundingRect(contour)
	padding = max(25, min(70, int(min(w, h) * 0.5)))
	x, y = max(0, x - padding), max(0, y - padding)
	w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
	roi_img = img[y:y+h, x:x+w]
	save_debug_image(roi_img, "06_detected_roi")
	logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
	return roi_img, (x, y, w, h)

	logging.info("No ROI found, using full image.")
	save_debug_image(img, "06_no_roi_fallback")
	return img, None
	except Exception as e:
	logging.error(f"ROI detection failed: {str(e)}")
	save_debug_image(img, "06_roi_error_fallback")
	return img, None

	def detect_digit_contour(digit_img, brightness):
	"""Simplified contour-based digit recognition."""
	try:
	h, w = digit_img.shape
	if h < 20 or w < 10:
	logging.debug("Digit image too small for contour detection.")
	return None

	# Normalize image
	pixel_count = np.sum(digit_img == 255)
	total_pixels = digit_img.size
	density = pixel_count / total_pixels
	if density < 0.1 or density > 0.8:
	return None

	# Contour analysis
	contours, _ = cv2.findContours(digit_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	return None

	contour = max(contours, key=cv2.contourArea)
	x, y, cw, ch = cv2.boundingRect(contour)
	if cw < 5 or ch < 10:
	return None

	aspect = cw / ch
	area_ratio = cv2.contourArea(contour) / (cw * ch)

	# Simplified digit patterns
	if aspect > 0.2 and aspect < 0.4 and area_ratio > 0.5:
	return '1'
	elif aspect > 0.5 and area_ratio > 0.6:
	if density > 0.5:
	return '8'
	elif density > 0.3:
	return '0'
	elif aspect > 0.4 and area_ratio > 0.5:
	if density > 0.4:
	return '3'
	elif density > 0.3:
	return '2'
	elif aspect > 0.3 and area_ratio > 0.4:
	return '5' if density > 0.3 else '7'
	elif aspect > 0.2 and area_ratio > 0.3:
	return '4' if density > 0.2 else '9'
	return None
	except Exception as e:
	logging.error(f"Contour digit detection failed: {str(e)}")
	return None

	def perform_ocr(img, roi_bbox):
	"""Perform OCR with Tesseract and contour-based fallback."""
	try:
	thresh, enhanced = preprocess_image(img)
	brightness = estimate_brightness(img)
	pil_img = Image.fromarray(enhanced)
	save_debug_image(pil_img, "07_ocr_input")

	# Tesseract with aggressive numeric config
	custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
	text = pytesseract.image_to_string(pil_img, config=custom_config)
	logging.info(f"Tesseract raw output: {text}")

	# Clean and validate
	text = re.sub(r"[^\d\.]", "", text)
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	text = text.strip('.')
	if text and re.fullmatch(r"^\d\.?\d$", text):
	text = text.lstrip('0') or '0'
	confidence = 98.0 if len(text.replace('.', '')) >= 3 else 95.0
	logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
	return text, confidence

	# Fallback to contour-based detection
	logging.info("Tesseract failed, using contour-based detection.")
	contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	digits_info = []
	for c in contours:
	x, y, w, h = cv2.boundingRect(c)
	if w > 15 and h > 20 and 0.2 <= w/h <= 1.2:
	digits_info.append((x, x+w, y, y+h))

	if digits_info:
	digits_info.sort(key=lambda x: x[0])
	recognized_text = ""
	prev_x_max = -float('inf')
	for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
	x_min, y_min = max(0, x_min), max(0, y_min)
	x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
	if x_max <= x_min or y_max <= y_min:
	continue
	digit_crop = thresh[y_min:y_max, x_min:x_max]
	save_debug_image(digit_crop, f"08_digit_crop_{idx}")
	digit = detect_digit_contour(digit_crop, brightness)
	if digit:
	recognized_text += digit
	elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
	recognized_text += '.'
	prev_x_max = x_max

	text = re.sub(r"[^\d\.]", "", recognized_text)
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	text = text.strip('.')
	if text and re.fullmatch(r"^\d\.?\d$", text):
	text = text.lstrip('0') or '0'
	confidence = 92.0 if len(text.replace('.', '')) >= 3 else 90.0
	logging.info(f"Validated contour text: {text}, Confidence: {confidence:.2f}%")
	return text, confidence

	logging.info("No valid digits detected.")
	return None, 0.0
	except Exception as e:
	logging.error(f"OCR failed: {str(e)}")
	return None, 0.0

	def extract_weight_from_image(pil_img):
	"""Extract weight from a digital scale image."""
	try:
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	save_debug_image(img, "00_input_image")
	img = correct_rotation(img)
	brightness = estimate_brightness(img)
	conf_threshold = 0.9 if brightness > 100 else 0.7

	roi_img, roi_bbox = detect_roi(img)
	if roi_bbox:
	conf_threshold = 1.15 if (roi_bbox[2] roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0

	result, confidence = perform_ocr(roi_img, roi_bbox)
	if result and confidence >= conf_threshold * 100:
	try:
	weight = float(result)
	if 0.01 <= weight <= 1000:
	logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
	return result, confidence
	logging.warning(f"Weight {result} out of range.")
	except ValueError:
	logging.warning(f"Invalid weight format: {result}")

	logging.info("Primary OCR failed, using full image fallback.")
	result, confidence = perform_ocr(img, None)
	if result and confidence >= conf_threshold * 0.95 * 100:
	try:
	weight = float(result)
	if 0.01 <= weight <= 1000:
	logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
	return result, confidence
	logging.warning(f"Full image weight {result} out of range.")
	except ValueError:
	logging.warning(f"Invalid full image weight format: {result}")

	logging.info("No valid weight detected.")
	return "Not detected", 0.0
	except Exception as e:
	logging.error(f"Weight extraction failed: {str(e)}")
	return "Not detected", 0.0