Spaces:
Sleeping
Sleeping
File size: 11,303 Bytes
79adcde |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
# tests/test_real_functionality.py
import pytest
import sys
import os
from PIL import Image
import numpy as np
# Add parent directory to import app
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Import actual functions from app.py
try:
from app import (
preprocess_image_for_ocr,
clean_extracted_text,
gradio_generate_soap
)
APP_AVAILABLE = True
except ImportError:
APP_AVAILABLE = False
@pytest.mark.skipif(not APP_AVAILABLE, reason="app.py not available")
class TestRealFunctionality:
"""Test actual app functions (not mocks)"""
def test_clean_extracted_text_real(self):
"""Test the actual clean_extracted_text function"""
dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 "
clean_text = clean_extracted_text(dirty_text)
assert "Line 1" in clean_text
assert "Line 2 with pipes" in clean_text
assert "Line 3" in clean_text
assert "|" not in clean_text
def test_preprocess_image_for_ocr_real(self):
"""Test actual image preprocessing"""
# Create test image
test_image = Image.new('RGB', (400, 300), color='white')
# Test the real function
result = preprocess_image_for_ocr(test_image)
assert isinstance(result, np.ndarray)
assert len(result.shape) == 2 # Should be grayscale
assert result.shape[0] >= 300 # Height should be at least 300
assert result.shape[1] >= 300 # Width should be at least 300
def test_gradio_generate_soap_empty_input_real(self):
"""Test actual Gradio function with empty input"""
result = gradio_generate_soap("", None)
assert "❌" in result or "Please enter" in result
def test_gradio_generate_soap_text_input_real(self):
"""Test actual Gradio function with text input"""
medical_text = """
Patient: John Smith, 45-year-old male
Chief Complaint: Chest pain for 2 hours
History: Sharp chest pain, 7/10 intensity
Physical Exam: VS: BP 150/90, HR 110
Assessment: Chest pain evaluation needed
Plan: EKG, cardiac enzymes
"""
# This will fail if generate_soap_note function isn't loaded
# but that's good - it tells us the real state of the app
result = gradio_generate_soap(medical_text, None)
# Check if it's an error message or actual SOAP content
if "❌" not in result:
# Should contain SOAP sections if successful
assert any(section in result.upper() for section in
["SUBJECTIVE", "OBJECTIVE", "ASSESSMENT", "PLAN"])
# tests/test_integration_real.py
import pytest
import sys
import os
from PIL import Image, ImageDraw, ImageFont
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class TestRealIntegration:
"""Test real integration without mocks"""
def test_image_with_text_creation(self):
"""Test creating an image with text for OCR testing"""
# Create a real image with text
img = Image.new('RGB', (800, 600), color='white')
draw = ImageDraw.Draw(img)
# Add medical text to image
medical_text = [
"Patient: John Doe",
"Age: 45 years old",
"Chief Complaint: Chest pain",
"Vital Signs: BP 140/90",
"Assessment: Further evaluation needed"
]
y_position = 50
for line in medical_text:
draw.text((50, y_position), line, fill='black')
y_position += 40
# Verify image was created
assert img.size == (800, 600)
assert img.mode == 'RGB'
# Convert to array to verify content
img_array = np.array(img)
# Should not be all white (has text)
assert not np.all(img_array == 255)
def test_examples_dictionary_structure(self):
"""Test that examples dictionary has proper structure"""
# Test the actual examples from the app
try:
from app import examples
# Check required keys exist
required_keys = ['chest_pain', 'diabetes', 'pediatric']
for key in required_keys:
assert key in examples
# Check content quality
for key, content in examples.items():
assert len(content) > 100 # Substantial content
assert "Patient:" in content
assert any(word in content.lower() for word in
["complaint", "history", "exam", "assessment", "plan"])
except ImportError:
pytest.skip("examples not available in app.py")
def test_file_extensions_validation(self):
"""Test file extension validation logic"""
allowed_extensions = ['.png', '.jpg', '.jpeg', '.txt', '.doc', '.docx', '.pdf']
# Test valid extensions
valid_files = ['image.png', 'document.pdf', 'notes.txt', 'scan.jpg']
for filename in valid_files:
file_ext = os.path.splitext(filename.lower())[1]
assert file_ext in allowed_extensions
# Test invalid extensions
invalid_files = ['file.xyz', 'document.zip', 'image.bmp']
for filename in invalid_files:
file_ext = os.path.splitext(filename.lower())[1]
assert file_ext not in allowed_extensions
# tests/test_error_scenarios_real.py
import pytest
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class TestRealErrorScenarios:
"""Test real error handling scenarios"""
def test_missing_dependencies_handling(self):
"""Test what happens when dependencies are missing"""
# Test if app handles missing dependencies gracefully
try:
import easyocr
EASYOCR_AVAILABLE = True
except ImportError:
EASYOCR_AVAILABLE = False
try:
import pytesseract
TESSERACT_AVAILABLE = True
except ImportError:
TESSERACT_AVAILABLE = False
# At least one OCR method should be available for the app to work
if not EASYOCR_AVAILABLE and not TESSERACT_AVAILABLE:
pytest.skip("No OCR dependencies available")
def test_torch_availability(self):
"""Test PyTorch availability and device detection"""
import torch
# Test device detection logic
device = "cuda" if torch.cuda.is_available() else "cpu"
assert device in ["cuda", "cpu"]
# Test tensor creation on detected device
test_tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
assert test_tensor is not None
def test_image_format_handling(self):
"""Test handling of different image formats"""
from PIL import Image
# Test RGB image
rgb_img = Image.new('RGB', (100, 100), color='white')
assert rgb_img.mode == 'RGB'
# Test grayscale image
gray_img = Image.new('L', (100, 100), color=128)
assert gray_img.mode == 'L'
# Test RGBA image
rgba_img = Image.new('RGBA', (100, 100), color=(255, 255, 255, 255))
assert rgba_img.mode == 'RGBA'
# tests/test_performance_real.py
import pytest
import time
import sys
import os
from PIL import Image
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class TestRealPerformance:
"""Test actual performance characteristics"""
def test_image_preprocessing_speed(self):
"""Test image preprocessing performance"""
try:
from app import preprocess_image_for_ocr
# Create test image
test_image = Image.new('RGB', (1000, 800), color='white')
# Time the preprocessing
start_time = time.time()
result = preprocess_image_for_ocr(test_image)
end_time = time.time()
processing_time = end_time - start_time
# Should process reasonably quickly (under 5 seconds)
assert processing_time < 5.0
assert result is not None
except ImportError:
pytest.skip("preprocess_image_for_ocr not available")
def test_text_cleaning_speed(self):
"""Test text cleaning performance"""
try:
from app import clean_extracted_text
# Create large text with lots of cleaning needed
dirty_text = " \n ".join([f"Line {i} with | pipes and _ underscores "
for i in range(1000)])
start_time = time.time()
clean_text = clean_extracted_text(dirty_text)
end_time = time.time()
processing_time = end_time - start_time
# Should clean text quickly (under 1 second)
assert processing_time < 1.0
assert len(clean_text) > 0
assert "|" not in clean_text
except ImportError:
pytest.skip("clean_extracted_text not available")
# tests/test_configuration_real.py
import pytest
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class TestRealConfiguration:
"""Test actual app configuration"""
def test_environment_variables(self):
"""Test environment variable handling"""
# Test HF_TOKEN detection
hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
# If no token, app should handle gracefully
if not hf_token:
# This is fine for testing - app should handle missing tokens
pass
else:
# If token exists, it should be a string
assert isinstance(hf_token, str)
assert len(hf_token) > 10 # Tokens should be substantial
def test_cuda_device_configuration(self):
"""Test CUDA device configuration"""
import torch
# Test device selection logic
cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "")
if cuda_visible == "":
# Should fall back to CPU
expected_device = "cpu"
elif torch.cuda.is_available():
expected_device = "cuda"
else:
expected_device = "cpu"
# Verify device is valid
assert expected_device in ["cuda", "cpu"]
def test_model_configuration(self):
"""Test model configuration parameters"""
# Test model ID
model_id = "google/gemma-3n-e2b-it"
assert isinstance(model_id, str)
assert "/" in model_id # Should be in format "org/model"
# Test model parameters that would be used
max_new_tokens = 512
temperature = 0.3
assert isinstance(max_new_tokens, int)
assert max_new_tokens > 0
assert isinstance(temperature, float)
assert 0 <= temperature <= 1.0 |