Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update ocr_engine.py
Browse files- ocr_engine.py +29 -6
    	
        ocr_engine.py
    CHANGED
    
    | @@ -5,18 +5,45 @@ import re | |
| 5 |  | 
| 6 | 
             
            reader = easyocr.Reader(['en'], gpu=False)
         | 
| 7 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 | 
             
            def extract_weight_from_image(pil_img):
         | 
| 9 | 
             
                try:
         | 
| 10 | 
             
                    img = np.array(pil_img)
         | 
| 11 |  | 
| 12 | 
            -
                    # Resize if too large
         | 
| 13 | 
             
                    max_dim = 1000
         | 
| 14 | 
             
                    height, width = img.shape[:2]
         | 
| 15 | 
             
                    if max(height, width) > max_dim:
         | 
| 16 | 
             
                        scale = max_dim / max(height, width)
         | 
| 17 | 
             
                        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 18 |  | 
| 19 | 
            -
                    results = reader.readtext( | 
| 20 |  | 
| 21 | 
             
                    best_weight = None
         | 
| 22 | 
             
                    best_conf = 0.0
         | 
| @@ -27,7 +54,6 @@ def extract_weight_from_image(pil_img): | |
| 27 | 
             
                        text, conf = item[1]
         | 
| 28 | 
             
                        cleaned = text.lower().strip()
         | 
| 29 |  | 
| 30 | 
            -
                        # Fix misread characters
         | 
| 31 | 
             
                        cleaned = cleaned.replace(",", ".")
         | 
| 32 | 
             
                        cleaned = cleaned.replace("o", "0").replace("O", "0")
         | 
| 33 | 
             
                        cleaned = cleaned.replace("s", "5").replace("S", "5")
         | 
| @@ -35,13 +61,11 @@ def extract_weight_from_image(pil_img): | |
| 35 | 
             
                        cleaned = cleaned.replace("kg", "").replace("kgs", "")
         | 
| 36 | 
             
                        cleaned = re.sub(r"[^\d\.]", "", cleaned)
         | 
| 37 |  | 
| 38 | 
            -
                        # Check for number format like 75.5, 102.3
         | 
| 39 | 
             
                        if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
         | 
| 40 | 
             
                            if conf > best_conf:
         | 
| 41 | 
             
                                best_weight = cleaned
         | 
| 42 | 
             
                                best_conf = conf
         | 
| 43 |  | 
| 44 | 
            -
                    # If nothing matched, return first numeric string found
         | 
| 45 | 
             
                    if not best_weight:
         | 
| 46 | 
             
                        for item in results:
         | 
| 47 | 
             
                            if len(item) != 2 or not isinstance(item[1], tuple):
         | 
| @@ -56,7 +80,6 @@ def extract_weight_from_image(pil_img): | |
| 56 | 
             
                    if not best_weight:
         | 
| 57 | 
             
                        return "Not detected", 0.0
         | 
| 58 |  | 
| 59 | 
            -
                    # Strip leading zeros
         | 
| 60 | 
             
                    if "." in best_weight:
         | 
| 61 | 
             
                        int_part, dec_part = best_weight.split(".")
         | 
| 62 | 
             
                        int_part = int_part.lstrip("0") or "0"
         | 
|  | |
| 5 |  | 
| 6 | 
             
            reader = easyocr.Reader(['en'], gpu=False)
         | 
| 7 |  | 
| 8 | 
            +
            def enhance_image(img):
         | 
| 9 | 
            +
                # Convert to grayscale
         | 
| 10 | 
            +
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                # Apply sharpening kernel
         | 
| 13 | 
            +
                kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]])
         | 
| 14 | 
            +
                sharp = cv2.filter2D(gray, -1, kernel)
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Contrast Limited Adaptive Histogram Equalization (CLAHE)
         | 
| 17 | 
            +
                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
         | 
| 18 | 
            +
                contrast = clahe.apply(sharp)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                # Denoising
         | 
| 21 | 
            +
                denoised = cv2.fastNlMeansDenoising(contrast, h=30)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                # Adaptive threshold for very dim images
         | 
| 24 | 
            +
                thresh = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
         | 
| 25 | 
            +
                                               cv2.THRESH_BINARY, 11, 2)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                return thresh
         | 
| 28 | 
            +
             | 
| 29 | 
             
            def extract_weight_from_image(pil_img):
         | 
| 30 | 
             
                try:
         | 
| 31 | 
             
                    img = np.array(pil_img)
         | 
| 32 |  | 
| 33 | 
            +
                    # Resize if too large or too small
         | 
| 34 | 
             
                    max_dim = 1000
         | 
| 35 | 
             
                    height, width = img.shape[:2]
         | 
| 36 | 
             
                    if max(height, width) > max_dim:
         | 
| 37 | 
             
                        scale = max_dim / max(height, width)
         | 
| 38 | 
             
                        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
         | 
| 39 | 
            +
                    elif max(height, width) < 400:
         | 
| 40 | 
            +
                        scale = 2.5  # Upscale very small images
         | 
| 41 | 
            +
                        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    # Enhance image for OCR
         | 
| 44 | 
            +
                    preprocessed = enhance_image(img)
         | 
| 45 |  | 
| 46 | 
            +
                    results = reader.readtext(preprocessed)
         | 
| 47 |  | 
| 48 | 
             
                    best_weight = None
         | 
| 49 | 
             
                    best_conf = 0.0
         | 
|  | |
| 54 | 
             
                        text, conf = item[1]
         | 
| 55 | 
             
                        cleaned = text.lower().strip()
         | 
| 56 |  | 
|  | |
| 57 | 
             
                        cleaned = cleaned.replace(",", ".")
         | 
| 58 | 
             
                        cleaned = cleaned.replace("o", "0").replace("O", "0")
         | 
| 59 | 
             
                        cleaned = cleaned.replace("s", "5").replace("S", "5")
         | 
|  | |
| 61 | 
             
                        cleaned = cleaned.replace("kg", "").replace("kgs", "")
         | 
| 62 | 
             
                        cleaned = re.sub(r"[^\d\.]", "", cleaned)
         | 
| 63 |  | 
|  | |
| 64 | 
             
                        if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
         | 
| 65 | 
             
                            if conf > best_conf:
         | 
| 66 | 
             
                                best_weight = cleaned
         | 
| 67 | 
             
                                best_conf = conf
         | 
| 68 |  | 
|  | |
| 69 | 
             
                    if not best_weight:
         | 
| 70 | 
             
                        for item in results:
         | 
| 71 | 
             
                            if len(item) != 2 or not isinstance(item[1], tuple):
         | 
|  | |
| 80 | 
             
                    if not best_weight:
         | 
| 81 | 
             
                        return "Not detected", 0.0
         | 
| 82 |  | 
|  | |
| 83 | 
             
                    if "." in best_weight:
         | 
| 84 | 
             
                        int_part, dec_part = best_weight.split(".")
         | 
| 85 | 
             
                        int_part = int_part.lstrip("0") or "0"
         | 
