Bonosa2 commited on
Commit
37d6469
·
verified ·
1 Parent(s): d31fe8c

Create tests/test_ocr_functionality.py

Browse files
Files changed (1) hide show
  1. tests/test_ocr_functionality.py +84 -0
tests/test_ocr_functionality.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from PIL import Image, ImageDraw, ImageFont
3
+ import numpy as np
4
+
5
+ class TestOCRFunctionality:
6
+ """Test OCR text extraction functionality"""
7
+
8
+ def test_preprocess_image_for_ocr(self, sample_image):
9
+ """Test image preprocessing for OCR"""
10
+ # Mock the preprocessing function
11
+ def mock_preprocess_image_for_ocr(image):
12
+ img_array = np.array(image)
13
+ if len(img_array.shape) == 3:
14
+ gray = np.mean(img_array, axis=2).astype(np.uint8)
15
+ else:
16
+ gray = img_array
17
+ return gray
18
+
19
+ result = mock_preprocess_image_for_ocr(sample_image)
20
+ assert isinstance(result, np.ndarray)
21
+ assert len(result.shape) == 2 # Should be grayscale
22
+
23
+ def test_extract_text_from_image_none_input(self):
24
+ """Test OCR with None input"""
25
+ def mock_extract_text_from_image(image):
26
+ if image is None:
27
+ return "❌ No image provided"
28
+ return "Sample extracted text"
29
+
30
+ result = mock_extract_text_from_image(None)
31
+ assert result == "❌ No image provided"
32
+
33
+ def test_extract_text_from_valid_image(self, sample_image):
34
+ """Test OCR with valid image"""
35
+ def mock_extract_text_from_image(image):
36
+ if image is None:
37
+ return "❌ No image provided"
38
+ # Simulate successful OCR
39
+ return "Patient: John Doe, 35-year-old male with chest pain"
40
+
41
+ result = mock_extract_text_from_image(sample_image)
42
+ assert "Patient:" in result
43
+ assert "chest pain" in result
44
+
45
+ def test_clean_extracted_text(self):
46
+ """Test text cleaning functionality"""
47
+ def mock_clean_extracted_text(text):
48
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
49
+ cleaned_text = '\n'.join(lines)
50
+ cleaned_text = cleaned_text.replace('|', '').replace('_', ' ')
51
+ return cleaned_text.strip()
52
+
53
+ dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 "
54
+ clean_text = mock_clean_extracted_text(dirty_text)
55
+
56
+ assert "Line 1" in clean_text
57
+ assert "Line 2 with pipes" in clean_text
58
+ assert "Line 3" in clean_text
59
+ assert "|" not in clean_text
60
+
61
+ def test_image_preprocessing_edge_cases(self):
62
+ """Test image preprocessing with edge cases"""
63
+ def mock_preprocess_image_for_ocr(image):
64
+ if image is None:
65
+ return None
66
+
67
+ img_array = np.array(image)
68
+ # Handle grayscale images
69
+ if len(img_array.shape) == 2:
70
+ return img_array
71
+ # Handle RGB images
72
+ elif len(img_array.shape) == 3:
73
+ return np.mean(img_array, axis=2).astype(np.uint8)
74
+ else:
75
+ raise ValueError("Unsupported image format")
76
+
77
+ # Test with None
78
+ assert mock_preprocess_image_for_ocr(None) is None
79
+
80
+ # Test with small grayscale image
81
+ small_gray = Image.new('L', (50, 50), color=128)
82
+ result = mock_preprocess_image_for_ocr(small_gray)
83
+ assert isinstance(result, np.ndarray)
84
+ assert len(result.shape) == 2