# tests/test_real_functionality.py import pytest import sys import os from PIL import Image import numpy as np # Add parent directory to import app sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Import actual functions from app.py try: from app import ( preprocess_image_for_ocr, clean_extracted_text, gradio_generate_soap ) APP_AVAILABLE = True except ImportError: APP_AVAILABLE = False @pytest.mark.skipif(not APP_AVAILABLE, reason="app.py not available") class TestRealFunctionality: """Test actual app functions (not mocks)""" def test_clean_extracted_text_real(self): """Test the actual clean_extracted_text function""" dirty_text = " Line 1 \n\n Line 2 with | pipes \n \n Line_3 " clean_text = clean_extracted_text(dirty_text) assert "Line 1" in clean_text assert "Line 2 with pipes" in clean_text assert "Line 3" in clean_text assert "|" not in clean_text def test_preprocess_image_for_ocr_real(self): """Test actual image preprocessing""" # Create test image test_image = Image.new('RGB', (400, 300), color='white') # Test the real function result = preprocess_image_for_ocr(test_image) assert isinstance(result, np.ndarray) assert len(result.shape) == 2 # Should be grayscale assert result.shape[0] >= 300 # Height should be at least 300 assert result.shape[1] >= 300 # Width should be at least 300 def test_gradio_generate_soap_empty_input_real(self): """Test actual Gradio function with empty input""" result = gradio_generate_soap("", None) assert "❌" in result or "Please enter" in result def test_gradio_generate_soap_text_input_real(self): """Test actual Gradio function with text input""" medical_text = """ Patient: John Smith, 45-year-old male Chief Complaint: Chest pain for 2 hours History: Sharp chest pain, 7/10 intensity Physical Exam: VS: BP 150/90, HR 110 Assessment: Chest pain evaluation needed Plan: EKG, cardiac enzymes """ # This will fail if generate_soap_note function isn't loaded # but that's good - it tells us the real state of the app result = gradio_generate_soap(medical_text, None) # Check if it's an error message or actual SOAP content if "❌" not in result: # Should contain SOAP sections if successful assert any(section in result.upper() for section in ["SUBJECTIVE", "OBJECTIVE", "ASSESSMENT", "PLAN"]) # tests/test_integration_real.py import pytest import sys import os from PIL import Image, ImageDraw, ImageFont sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) class TestRealIntegration: """Test real integration without mocks""" def test_image_with_text_creation(self): """Test creating an image with text for OCR testing""" # Create a real image with text img = Image.new('RGB', (800, 600), color='white') draw = ImageDraw.Draw(img) # Add medical text to image medical_text = [ "Patient: John Doe", "Age: 45 years old", "Chief Complaint: Chest pain", "Vital Signs: BP 140/90", "Assessment: Further evaluation needed" ] y_position = 50 for line in medical_text: draw.text((50, y_position), line, fill='black') y_position += 40 # Verify image was created assert img.size == (800, 600) assert img.mode == 'RGB' # Convert to array to verify content img_array = np.array(img) # Should not be all white (has text) assert not np.all(img_array == 255) def test_examples_dictionary_structure(self): """Test that examples dictionary has proper structure""" # Test the actual examples from the app try: from app import examples # Check required keys exist required_keys = ['chest_pain', 'diabetes', 'pediatric'] for key in required_keys: assert key in examples # Check content quality for key, content in examples.items(): assert len(content) > 100 # Substantial content assert "Patient:" in content assert any(word in content.lower() for word in ["complaint", "history", "exam", "assessment", "plan"]) except ImportError: pytest.skip("examples not available in app.py") def test_file_extensions_validation(self): """Test file extension validation logic""" allowed_extensions = ['.png', '.jpg', '.jpeg', '.txt', '.doc', '.docx', '.pdf'] # Test valid extensions valid_files = ['image.png', 'document.pdf', 'notes.txt', 'scan.jpg'] for filename in valid_files: file_ext = os.path.splitext(filename.lower())[1] assert file_ext in allowed_extensions # Test invalid extensions invalid_files = ['file.xyz', 'document.zip', 'image.bmp'] for filename in invalid_files: file_ext = os.path.splitext(filename.lower())[1] assert file_ext not in allowed_extensions # tests/test_error_scenarios_real.py import pytest import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) class TestRealErrorScenarios: """Test real error handling scenarios""" def test_missing_dependencies_handling(self): """Test what happens when dependencies are missing""" # Test if app handles missing dependencies gracefully try: import easyocr EASYOCR_AVAILABLE = True except ImportError: EASYOCR_AVAILABLE = False try: import pytesseract TESSERACT_AVAILABLE = True except ImportError: TESSERACT_AVAILABLE = False # At least one OCR method should be available for the app to work if not EASYOCR_AVAILABLE and not TESSERACT_AVAILABLE: pytest.skip("No OCR dependencies available") def test_torch_availability(self): """Test PyTorch availability and device detection""" import torch # Test device detection logic device = "cuda" if torch.cuda.is_available() else "cpu" assert device in ["cuda", "cpu"] # Test tensor creation on detected device test_tensor = torch.tensor([1, 2, 3], dtype=torch.float32) assert test_tensor is not None def test_image_format_handling(self): """Test handling of different image formats""" from PIL import Image # Test RGB image rgb_img = Image.new('RGB', (100, 100), color='white') assert rgb_img.mode == 'RGB' # Test grayscale image gray_img = Image.new('L', (100, 100), color=128) assert gray_img.mode == 'L' # Test RGBA image rgba_img = Image.new('RGBA', (100, 100), color=(255, 255, 255, 255)) assert rgba_img.mode == 'RGBA' # tests/test_performance_real.py import pytest import time import sys import os from PIL import Image sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) class TestRealPerformance: """Test actual performance characteristics""" def test_image_preprocessing_speed(self): """Test image preprocessing performance""" try: from app import preprocess_image_for_ocr # Create test image test_image = Image.new('RGB', (1000, 800), color='white') # Time the preprocessing start_time = time.time() result = preprocess_image_for_ocr(test_image) end_time = time.time() processing_time = end_time - start_time # Should process reasonably quickly (under 5 seconds) assert processing_time < 5.0 assert result is not None except ImportError: pytest.skip("preprocess_image_for_ocr not available") def test_text_cleaning_speed(self): """Test text cleaning performance""" try: from app import clean_extracted_text # Create large text with lots of cleaning needed dirty_text = " \n ".join([f"Line {i} with | pipes and _ underscores " for i in range(1000)]) start_time = time.time() clean_text = clean_extracted_text(dirty_text) end_time = time.time() processing_time = end_time - start_time # Should clean text quickly (under 1 second) assert processing_time < 1.0 assert len(clean_text) > 0 assert "|" not in clean_text except ImportError: pytest.skip("clean_extracted_text not available") # tests/test_configuration_real.py import pytest import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) class TestRealConfiguration: """Test actual app configuration""" def test_environment_variables(self): """Test environment variable handling""" # Test HF_TOKEN detection hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN') # If no token, app should handle gracefully if not hf_token: # This is fine for testing - app should handle missing tokens pass else: # If token exists, it should be a string assert isinstance(hf_token, str) assert len(hf_token) > 10 # Tokens should be substantial def test_cuda_device_configuration(self): """Test CUDA device configuration""" import torch # Test device selection logic cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "") if cuda_visible == "": # Should fall back to CPU expected_device = "cpu" elif torch.cuda.is_available(): expected_device = "cuda" else: expected_device = "cpu" # Verify device is valid assert expected_device in ["cuda", "cpu"] def test_model_configuration(self): """Test model configuration parameters""" # Test model ID model_id = "google/gemma-3n-e2b-it" assert isinstance(model_id, str) assert "/" in model_id # Should be in format "org/model" # Test model parameters that would be used max_new_tokens = 512 temperature = 0.3 assert isinstance(max_new_tokens, int) assert max_new_tokens > 0 assert isinstance(temperature, float) assert 0 <= temperature <= 1.0