Spaces:

Bonosa2
/

Scribbled-docs-notes

Sleeping

App Files Files Community

Bonosa2 commited on Jul 11

Commit

37d6469

verified ·

1 Parent(s): d31fe8c

Create tests/test_ocr_functionality.py

Browse files

Files changed (1) hide show

tests/test_ocr_functionality.py +84 -0

tests/test_ocr_functionality.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import pytest
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+class TestOCRFunctionality:
+    """Test OCR text extraction functionality"""
+    def test_preprocess_image_for_ocr(self, sample_image):
+        """Test image preprocessing for OCR"""
+        # Mock the preprocessing function
+        def mock_preprocess_image_for_ocr(image):
+            img_array = np.array(image)
+            if len(img_array.shape) == 3:
+                gray = np.mean(img_array, axis=2).astype(np.uint8)
+            else:
+                gray = img_array
+            return gray
+        result = mock_preprocess_image_for_ocr(sample_image)
+        assert isinstance(result, np.ndarray)
+        assert len(result.shape) == 2  # Should be grayscale
+    def test_extract_text_from_image_none_input(self):
+        """Test OCR with None input"""
+        def mock_extract_text_from_image(image):
+            if image is None:
+                return "❌ No image provided"
+            return "Sample extracted text"
+        result = mock_extract_text_from_image(None)
+        assert result == "❌ No image provided"
+    def test_extract_text_from_valid_image(self, sample_image):
+        """Test OCR with valid image"""
+        def mock_extract_text_from_image(image):
+            if image is None:
+                return "❌ No image provided"
+            # Simulate successful OCR
+            return "Patient: John Doe, 35-year-old male with chest pain"
+        result = mock_extract_text_from_image(sample_image)
+        assert "Patient:" in result
+        assert "chest pain" in result
+    def test_clean_extracted_text(self):
+        """Test text cleaning functionality"""
+        def mock_clean_extracted_text(text):
+            lines = [line.strip() for line in text.split('\n') if line.strip()]
+            cleaned_text = '\n'.join(lines)
+            cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
+            return cleaned_text.strip()
+        dirty_text = "  Line 1  \n\n  Line 2 with | pipes  \n   \n  Line_3  "
+        clean_text = mock_clean_extracted_text(dirty_text)
+        assert "Line 1" in clean_text
+        assert "Line 2 with  pipes" in clean_text
+        assert "Line 3" in clean_text
+        assert "|" not in clean_text
+    def test_image_preprocessing_edge_cases(self):
+        """Test image preprocessing with edge cases"""
+        def mock_preprocess_image_for_ocr(image):
+            if image is None:
+                return None
+            img_array = np.array(image)
+            # Handle grayscale images
+            if len(img_array.shape) == 2:
+                return img_array
+            # Handle RGB images
+            elif len(img_array.shape) == 3:
+                return np.mean(img_array, axis=2).astype(np.uint8)
+            else:
+                raise ValueError("Unsupported image format")
+        # Test with None
+        assert mock_preprocess_image_for_ocr(None) is None
+        # Test with small grayscale image
+        small_gray = Image.new('L', (50, 50), color=128)
+        result = mock_preprocess_image_for_ocr(small_gray)
+        assert isinstance(result, np.ndarray)
+        assert len(result.shape) == 2