Spaces:
Sleeping
Sleeping
# tests/test_real_functionality.py | |
import pytest | |
import sys | |
import os | |
from PIL import Image | |
import numpy as np | |
# Add parent directory to import app | |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
# Import actual functions from app.py | |
try: | |
from app import ( | |
preprocess_image_for_ocr, | |
clean_extracted_text, | |
gradio_generate_soap | |
) | |
APP_AVAILABLE = True | |
except ImportError: | |
APP_AVAILABLE = False | |
class TestRealFunctionality: | |
"""Test actual app functions (not mocks)""" | |
def test_clean_extracted_text_real(self): | |
"""Test the actual clean_extracted_text function""" | |
dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 " | |
clean_text = clean_extracted_text(dirty_text) | |
assert "Line 1" in clean_text | |
assert "Line 2 with pipes" in clean_text | |
assert "Line 3" in clean_text | |
assert "|" not in clean_text | |
def test_preprocess_image_for_ocr_real(self): | |
"""Test actual image preprocessing""" | |
# Create test image | |
test_image = Image.new('RGB', (400, 300), color='white') | |
# Test the real function | |
result = preprocess_image_for_ocr(test_image) | |
assert isinstance(result, np.ndarray) | |
assert len(result.shape) == 2 # Should be grayscale | |
assert result.shape[0] >= 300 # Height should be at least 300 | |
assert result.shape[1] >= 300 # Width should be at least 300 | |
def test_gradio_generate_soap_empty_input_real(self): | |
"""Test actual Gradio function with empty input""" | |
result = gradio_generate_soap("", None) | |
assert "❌" in result or "Please enter" in result | |
def test_gradio_generate_soap_text_input_real(self): | |
"""Test actual Gradio function with text input""" | |
medical_text = """ | |
Patient: John Smith, 45-year-old male | |
Chief Complaint: Chest pain for 2 hours | |
History: Sharp chest pain, 7/10 intensity | |
Physical Exam: VS: BP 150/90, HR 110 | |
Assessment: Chest pain evaluation needed | |
Plan: EKG, cardiac enzymes | |
""" | |
# This will fail if generate_soap_note function isn't loaded | |
# but that's good - it tells us the real state of the app | |
result = gradio_generate_soap(medical_text, None) | |
# Check if it's an error message or actual SOAP content | |
if "❌" not in result: | |
# Should contain SOAP sections if successful | |
assert any(section in result.upper() for section in | |
["SUBJECTIVE", "OBJECTIVE", "ASSESSMENT", "PLAN"]) | |
# tests/test_integration_real.py | |
import pytest | |
import sys | |
import os | |
from PIL import Image, ImageDraw, ImageFont | |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
class TestRealIntegration: | |
"""Test real integration without mocks""" | |
def test_image_with_text_creation(self): | |
"""Test creating an image with text for OCR testing""" | |
# Create a real image with text | |
img = Image.new('RGB', (800, 600), color='white') | |
draw = ImageDraw.Draw(img) | |
# Add medical text to image | |
medical_text = [ | |
"Patient: John Doe", | |
"Age: 45 years old", | |
"Chief Complaint: Chest pain", | |
"Vital Signs: BP 140/90", | |
"Assessment: Further evaluation needed" | |
] | |
y_position = 50 | |
for line in medical_text: | |
draw.text((50, y_position), line, fill='black') | |
y_position += 40 | |
# Verify image was created | |
assert img.size == (800, 600) | |
assert img.mode == 'RGB' | |
# Convert to array to verify content | |
img_array = np.array(img) | |
# Should not be all white (has text) | |
assert not np.all(img_array == 255) | |
def test_examples_dictionary_structure(self): | |
"""Test that examples dictionary has proper structure""" | |
# Test the actual examples from the app | |
try: | |
from app import examples | |
# Check required keys exist | |
required_keys = ['chest_pain', 'diabetes', 'pediatric'] | |
for key in required_keys: | |
assert key in examples | |
# Check content quality | |
for key, content in examples.items(): | |
assert len(content) > 100 # Substantial content | |
assert "Patient:" in content | |
assert any(word in content.lower() for word in | |
["complaint", "history", "exam", "assessment", "plan"]) | |
except ImportError: | |
pytest.skip("examples not available in app.py") | |
def test_file_extensions_validation(self): | |
"""Test file extension validation logic""" | |
allowed_extensions = ['.png', '.jpg', '.jpeg', '.txt', '.doc', '.docx', '.pdf'] | |
# Test valid extensions | |
valid_files = ['image.png', 'document.pdf', 'notes.txt', 'scan.jpg'] | |
for filename in valid_files: | |
file_ext = os.path.splitext(filename.lower())[1] | |
assert file_ext in allowed_extensions | |
# Test invalid extensions | |
invalid_files = ['file.xyz', 'document.zip', 'image.bmp'] | |
for filename in invalid_files: | |
file_ext = os.path.splitext(filename.lower())[1] | |
assert file_ext not in allowed_extensions | |
# tests/test_error_scenarios_real.py | |
import pytest | |
import sys | |
import os | |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
class TestRealErrorScenarios: | |
"""Test real error handling scenarios""" | |
def test_missing_dependencies_handling(self): | |
"""Test what happens when dependencies are missing""" | |
# Test if app handles missing dependencies gracefully | |
try: | |
import easyocr | |
EASYOCR_AVAILABLE = True | |
except ImportError: | |
EASYOCR_AVAILABLE = False | |
try: | |
import pytesseract | |
TESSERACT_AVAILABLE = True | |
except ImportError: | |
TESSERACT_AVAILABLE = False | |
# At least one OCR method should be available for the app to work | |
if not EASYOCR_AVAILABLE and not TESSERACT_AVAILABLE: | |
pytest.skip("No OCR dependencies available") | |
def test_torch_availability(self): | |
"""Test PyTorch availability and device detection""" | |
import torch | |
# Test device detection logic | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
assert device in ["cuda", "cpu"] | |
# Test tensor creation on detected device | |
test_tensor = torch.tensor([1, 2, 3], dtype=torch.float32) | |
assert test_tensor is not None | |
def test_image_format_handling(self): | |
"""Test handling of different image formats""" | |
from PIL import Image | |
# Test RGB image | |
rgb_img = Image.new('RGB', (100, 100), color='white') | |
assert rgb_img.mode == 'RGB' | |
# Test grayscale image | |
gray_img = Image.new('L', (100, 100), color=128) | |
assert gray_img.mode == 'L' | |
# Test RGBA image | |
rgba_img = Image.new('RGBA', (100, 100), color=(255, 255, 255, 255)) | |
assert rgba_img.mode == 'RGBA' | |
# tests/test_performance_real.py | |
import pytest | |
import time | |
import sys | |
import os | |
from PIL import Image | |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
class TestRealPerformance: | |
"""Test actual performance characteristics""" | |
def test_image_preprocessing_speed(self): | |
"""Test image preprocessing performance""" | |
try: | |
from app import preprocess_image_for_ocr | |
# Create test image | |
test_image = Image.new('RGB', (1000, 800), color='white') | |
# Time the preprocessing | |
start_time = time.time() | |
result = preprocess_image_for_ocr(test_image) | |
end_time = time.time() | |
processing_time = end_time - start_time | |
# Should process reasonably quickly (under 5 seconds) | |
assert processing_time < 5.0 | |
assert result is not None | |
except ImportError: | |
pytest.skip("preprocess_image_for_ocr not available") | |
def test_text_cleaning_speed(self): | |
"""Test text cleaning performance""" | |
try: | |
from app import clean_extracted_text | |
# Create large text with lots of cleaning needed | |
dirty_text = " \n ".join([f"Line {i} with | pipes and _ underscores " | |
for i in range(1000)]) | |
start_time = time.time() | |
clean_text = clean_extracted_text(dirty_text) | |
end_time = time.time() | |
processing_time = end_time - start_time | |
# Should clean text quickly (under 1 second) | |
assert processing_time < 1.0 | |
assert len(clean_text) > 0 | |
assert "|" not in clean_text | |
except ImportError: | |
pytest.skip("clean_extracted_text not available") | |
# tests/test_configuration_real.py | |
import pytest | |
import sys | |
import os | |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
class TestRealConfiguration: | |
"""Test actual app configuration""" | |
def test_environment_variables(self): | |
"""Test environment variable handling""" | |
# Test HF_TOKEN detection | |
hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN') | |
# If no token, app should handle gracefully | |
if not hf_token: | |
# This is fine for testing - app should handle missing tokens | |
pass | |
else: | |
# If token exists, it should be a string | |
assert isinstance(hf_token, str) | |
assert len(hf_token) > 10 # Tokens should be substantial | |
def test_cuda_device_configuration(self): | |
"""Test CUDA device configuration""" | |
import torch | |
# Test device selection logic | |
cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "") | |
if cuda_visible == "": | |
# Should fall back to CPU | |
expected_device = "cpu" | |
elif torch.cuda.is_available(): | |
expected_device = "cuda" | |
else: | |
expected_device = "cpu" | |
# Verify device is valid | |
assert expected_device in ["cuda", "cpu"] | |
def test_model_configuration(self): | |
"""Test model configuration parameters""" | |
# Test model ID | |
model_id = "google/gemma-3n-e2b-it" | |
assert isinstance(model_id, str) | |
assert "/" in model_id # Should be in format "org/model" | |
# Test model parameters that would be used | |
max_new_tokens = 512 | |
temperature = 0.3 | |
assert isinstance(max_new_tokens, int) | |
assert max_new_tokens > 0 | |
assert isinstance(temperature, float) | |
assert 0 <= temperature <= 1.0 |