Scribbled-docs-notes / tests /test_ocr_functionality.py
Bonosa2's picture
Create tests/test_ocr_functionality.py
37d6469 verified
raw
history blame
3.26 kB
import pytest
from PIL import Image, ImageDraw, ImageFont
import numpy as np
class TestOCRFunctionality:
"""Test OCR text extraction functionality"""
def test_preprocess_image_for_ocr(self, sample_image):
"""Test image preprocessing for OCR"""
# Mock the preprocessing function
def mock_preprocess_image_for_ocr(image):
img_array = np.array(image)
if len(img_array.shape) == 3:
gray = np.mean(img_array, axis=2).astype(np.uint8)
else:
gray = img_array
return gray
result = mock_preprocess_image_for_ocr(sample_image)
assert isinstance(result, np.ndarray)
assert len(result.shape) == 2 # Should be grayscale
def test_extract_text_from_image_none_input(self):
"""Test OCR with None input"""
def mock_extract_text_from_image(image):
if image is None:
return "❌ No image provided"
return "Sample extracted text"
result = mock_extract_text_from_image(None)
assert result == "❌ No image provided"
def test_extract_text_from_valid_image(self, sample_image):
"""Test OCR with valid image"""
def mock_extract_text_from_image(image):
if image is None:
return "❌ No image provided"
# Simulate successful OCR
return "Patient: John Doe, 35-year-old male with chest pain"
result = mock_extract_text_from_image(sample_image)
assert "Patient:" in result
assert "chest pain" in result
def test_clean_extracted_text(self):
"""Test text cleaning functionality"""
def mock_clean_extracted_text(text):
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = '\n'.join(lines)
cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
return cleaned_text.strip()
dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 "
clean_text = mock_clean_extracted_text(dirty_text)
assert "Line 1" in clean_text
assert "Line 2 with pipes" in clean_text
assert "Line 3" in clean_text
assert "|" not in clean_text
def test_image_preprocessing_edge_cases(self):
"""Test image preprocessing with edge cases"""
def mock_preprocess_image_for_ocr(image):
if image is None:
return None
img_array = np.array(image)
# Handle grayscale images
if len(img_array.shape) == 2:
return img_array
# Handle RGB images
elif len(img_array.shape) == 3:
return np.mean(img_array, axis=2).astype(np.uint8)
else:
raise ValueError("Unsupported image format")
# Test with None
assert mock_preprocess_image_for_ocr(None) is None
# Test with small grayscale image
small_gray = Image.new('L', (50, 50), color=128)
result = mock_preprocess_image_for_ocr(small_gray)
assert isinstance(result, np.ndarray)
assert len(result.shape) == 2