Spaces:
Sleeping
Sleeping
Create tests/test_ocr_functionality.py
Browse files
tests/test_ocr_functionality.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from PIL import Image, ImageDraw, ImageFont
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
class TestOCRFunctionality:
|
6 |
+
"""Test OCR text extraction functionality"""
|
7 |
+
|
8 |
+
def test_preprocess_image_for_ocr(self, sample_image):
|
9 |
+
"""Test image preprocessing for OCR"""
|
10 |
+
# Mock the preprocessing function
|
11 |
+
def mock_preprocess_image_for_ocr(image):
|
12 |
+
img_array = np.array(image)
|
13 |
+
if len(img_array.shape) == 3:
|
14 |
+
gray = np.mean(img_array, axis=2).astype(np.uint8)
|
15 |
+
else:
|
16 |
+
gray = img_array
|
17 |
+
return gray
|
18 |
+
|
19 |
+
result = mock_preprocess_image_for_ocr(sample_image)
|
20 |
+
assert isinstance(result, np.ndarray)
|
21 |
+
assert len(result.shape) == 2 # Should be grayscale
|
22 |
+
|
23 |
+
def test_extract_text_from_image_none_input(self):
|
24 |
+
"""Test OCR with None input"""
|
25 |
+
def mock_extract_text_from_image(image):
|
26 |
+
if image is None:
|
27 |
+
return "❌ No image provided"
|
28 |
+
return "Sample extracted text"
|
29 |
+
|
30 |
+
result = mock_extract_text_from_image(None)
|
31 |
+
assert result == "❌ No image provided"
|
32 |
+
|
33 |
+
def test_extract_text_from_valid_image(self, sample_image):
|
34 |
+
"""Test OCR with valid image"""
|
35 |
+
def mock_extract_text_from_image(image):
|
36 |
+
if image is None:
|
37 |
+
return "❌ No image provided"
|
38 |
+
# Simulate successful OCR
|
39 |
+
return "Patient: John Doe, 35-year-old male with chest pain"
|
40 |
+
|
41 |
+
result = mock_extract_text_from_image(sample_image)
|
42 |
+
assert "Patient:" in result
|
43 |
+
assert "chest pain" in result
|
44 |
+
|
45 |
+
def test_clean_extracted_text(self):
|
46 |
+
"""Test text cleaning functionality"""
|
47 |
+
def mock_clean_extracted_text(text):
|
48 |
+
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
49 |
+
cleaned_text = '\n'.join(lines)
|
50 |
+
cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
|
51 |
+
return cleaned_text.strip()
|
52 |
+
|
53 |
+
dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 "
|
54 |
+
clean_text = mock_clean_extracted_text(dirty_text)
|
55 |
+
|
56 |
+
assert "Line 1" in clean_text
|
57 |
+
assert "Line 2 with pipes" in clean_text
|
58 |
+
assert "Line 3" in clean_text
|
59 |
+
assert "|" not in clean_text
|
60 |
+
|
61 |
+
def test_image_preprocessing_edge_cases(self):
|
62 |
+
"""Test image preprocessing with edge cases"""
|
63 |
+
def mock_preprocess_image_for_ocr(image):
|
64 |
+
if image is None:
|
65 |
+
return None
|
66 |
+
|
67 |
+
img_array = np.array(image)
|
68 |
+
# Handle grayscale images
|
69 |
+
if len(img_array.shape) == 2:
|
70 |
+
return img_array
|
71 |
+
# Handle RGB images
|
72 |
+
elif len(img_array.shape) == 3:
|
73 |
+
return np.mean(img_array, axis=2).astype(np.uint8)
|
74 |
+
else:
|
75 |
+
raise ValueError("Unsupported image format")
|
76 |
+
|
77 |
+
# Test with None
|
78 |
+
assert mock_preprocess_image_for_ocr(None) is None
|
79 |
+
|
80 |
+
# Test with small grayscale image
|
81 |
+
small_gray = Image.new('L', (50, 50), color=128)
|
82 |
+
result = mock_preprocess_image_for_ocr(small_gray)
|
83 |
+
assert isinstance(result, np.ndarray)
|
84 |
+
assert len(result.shape) == 2
|