Spaces:

Bonosa2
/

Scribbled-docs-notes

Sleeping

App Files Files Community

Scribbled-docs-notes / tests /test_ocr_functionality.py

Bonosa2

Create tests/test_ocr_functionality.py

37d6469 verified about 1 month ago

raw

history blame

3.26 kB

	import pytest
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np

	class TestOCRFunctionality:
	"""Test OCR text extraction functionality"""

	def test_preprocess_image_for_ocr(self, sample_image):
	"""Test image preprocessing for OCR"""
	# Mock the preprocessing function
	def mock_preprocess_image_for_ocr(image):
	img_array = np.array(image)
	if len(img_array.shape) == 3:
	gray = np.mean(img_array, axis=2).astype(np.uint8)
	else:
	gray = img_array
	return gray

	result = mock_preprocess_image_for_ocr(sample_image)
	assert isinstance(result, np.ndarray)
	assert len(result.shape) == 2 # Should be grayscale

	def test_extract_text_from_image_none_input(self):
	"""Test OCR with None input"""
	def mock_extract_text_from_image(image):
	if image is None:
	return "❌ No image provided"
	return "Sample extracted text"

	result = mock_extract_text_from_image(None)
	assert result == "❌ No image provided"

	def test_extract_text_from_valid_image(self, sample_image):
	"""Test OCR with valid image"""
	def mock_extract_text_from_image(image):
	if image is None:
	return "❌ No image provided"
	# Simulate successful OCR
	return "Patient: John Doe, 35-year-old male with chest pain"

	result = mock_extract_text_from_image(sample_image)
	assert "Patient:" in result
	assert "chest pain" in result

	def test_clean_extracted_text(self):
	"""Test text cleaning functionality"""
	def mock_clean_extracted_text(text):
	lines = [line.strip() for line in text.split('\n') if line.strip()]
	cleaned_text = '\n'.join(lines)
	cleaned_text = cleaned_text.replace('\|', '').replace('_', ' ')
	return cleaned_text.strip()

	dirty_text = " Line 1 \n\n Line 2 with \| pipes \n \n Line_3 "
	clean_text = mock_clean_extracted_text(dirty_text)

	assert "Line 1" in clean_text
	assert "Line 2 with pipes" in clean_text
	assert "Line 3" in clean_text
	assert "\|" not in clean_text

	def test_image_preprocessing_edge_cases(self):
	"""Test image preprocessing with edge cases"""
	def mock_preprocess_image_for_ocr(image):
	if image is None:
	return None

	img_array = np.array(image)
	# Handle grayscale images
	if len(img_array.shape) == 2:
	return img_array
	# Handle RGB images
	elif len(img_array.shape) == 3:
	return np.mean(img_array, axis=2).astype(np.uint8)
	else:
	raise ValueError("Unsupported image format")

	# Test with None
	assert mock_preprocess_image_for_ocr(None) is None

	# Test with small grayscale image
	small_gray = Image.new('L', (50, 50), color=128)
	result = mock_preprocess_image_for_ocr(small_gray)
	assert isinstance(result, np.ndarray)
	assert len(result.shape) == 2