import pytest from PIL import Image, ImageDraw, ImageFont import numpy as np class TestOCRFunctionality: """Test OCR text extraction functionality""" def test_preprocess_image_for_ocr(self, sample_image): """Test image preprocessing for OCR""" # Mock the preprocessing function def mock_preprocess_image_for_ocr(image): img_array = np.array(image) if len(img_array.shape) == 3: gray = np.mean(img_array, axis=2).astype(np.uint8) else: gray = img_array return gray result = mock_preprocess_image_for_ocr(sample_image) assert isinstance(result, np.ndarray) assert len(result.shape) == 2 # Should be grayscale def test_extract_text_from_image_none_input(self): """Test OCR with None input""" def mock_extract_text_from_image(image): if image is None: return "❌ No image provided" return "Sample extracted text" result = mock_extract_text_from_image(None) assert result == "❌ No image provided" def test_extract_text_from_valid_image(self, sample_image): """Test OCR with valid image""" def mock_extract_text_from_image(image): if image is None: return "❌ No image provided" # Simulate successful OCR return "Patient: John Doe, 35-year-old male with chest pain" result = mock_extract_text_from_image(sample_image) assert "Patient:" in result assert "chest pain" in result def test_clean_extracted_text(self): """Test text cleaning functionality""" def mock_clean_extracted_text(text): lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = '\n'.join(lines) cleaned_text = cleaned_text.replace('|', '').replace('_', ' ') return cleaned_text.strip() dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 " clean_text = mock_clean_extracted_text(dirty_text) assert "Line 1" in clean_text assert "Line 2 with pipes" in clean_text assert "Line 3" in clean_text assert "|" not in clean_text def test_image_preprocessing_edge_cases(self): """Test image preprocessing with edge cases""" def mock_preprocess_image_for_ocr(image): if image is None: return None img_array = np.array(image) # Handle grayscale images if len(img_array.shape) == 2: return img_array # Handle RGB images elif len(img_array.shape) == 3: return np.mean(img_array, axis=2).astype(np.uint8) else: raise ValueError("Unsupported image format") # Test with None assert mock_preprocess_image_for_ocr(None) is None # Test with small grayscale image small_gray = Image.new('L', (50, 50), color=128) result = mock_preprocess_image_for_ocr(small_gray) assert isinstance(result, np.ndarray) assert len(result.shape) == 2