Spaces:

Bonosa2
/

Scribbled-docs-notes

Sleeping

File size: 11,303 Bytes

79adcde

# tests/test_real_functionality.py
import pytest
import sys
import os
from PIL import Image
import numpy as np

# Add parent directory to import app
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Import actual functions from app.py
try:
    from app import (
        preprocess_image_for_ocr,
        clean_extracted_text,
        gradio_generate_soap
    )
    APP_AVAILABLE = True
except ImportError:
    APP_AVAILABLE = False

@pytest.mark.skipif(not APP_AVAILABLE, reason="app.py not available")
class TestRealFunctionality:
    """Test actual app functions (not mocks)"""
    
    def test_clean_extracted_text_real(self):
        """Test the actual clean_extracted_text function"""
        dirty_text = "  Line 1  \n\n  Line 2 with | pipes  \n   \n  Line_3  "
        clean_text = clean_extracted_text(dirty_text)
        
        assert "Line 1" in clean_text
        assert "Line 2 with  pipes" in clean_text
        assert "Line 3" in clean_text
        assert "|" not in clean_text

    def test_preprocess_image_for_ocr_real(self):
        """Test actual image preprocessing"""
        # Create test image
        test_image = Image.new('RGB', (400, 300), color='white')
        
        # Test the real function
        result = preprocess_image_for_ocr(test_image)
        
        assert isinstance(result, np.ndarray)
        assert len(result.shape) == 2  # Should be grayscale
        assert result.shape[0] >= 300  # Height should be at least 300
        assert result.shape[1] >= 300  # Width should be at least 300

    def test_gradio_generate_soap_empty_input_real(self):
        """Test actual Gradio function with empty input"""
        result = gradio_generate_soap("", None)
        assert "❌" in result or "Please enter" in result

    def test_gradio_generate_soap_text_input_real(self):
        """Test actual Gradio function with text input"""
        medical_text = """
        Patient: John Smith, 45-year-old male
        Chief Complaint: Chest pain for 2 hours
        History: Sharp chest pain, 7/10 intensity
        Physical Exam: VS: BP 150/90, HR 110
        Assessment: Chest pain evaluation needed
        Plan: EKG, cardiac enzymes
        """
        
        # This will fail if generate_soap_note function isn't loaded
        # but that's good - it tells us the real state of the app
        result = gradio_generate_soap(medical_text, None)
        
        # Check if it's an error message or actual SOAP content
        if "❌" not in result:
            # Should contain SOAP sections if successful
            assert any(section in result.upper() for section in 
                      ["SUBJECTIVE", "OBJECTIVE", "ASSESSMENT", "PLAN"])

# tests/test_integration_real.py
import pytest
import sys
import os
from PIL import Image, ImageDraw, ImageFont

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

class TestRealIntegration:
    """Test real integration without mocks"""
    
    def test_image_with_text_creation(self):
        """Test creating an image with text for OCR testing"""
        # Create a real image with text
        img = Image.new('RGB', (800, 600), color='white')
        draw = ImageDraw.Draw(img)
        
        # Add medical text to image
        medical_text = [
            "Patient: John Doe",
            "Age: 45 years old",
            "Chief Complaint: Chest pain",
            "Vital Signs: BP 140/90",
            "Assessment: Further evaluation needed"
        ]
        
        y_position = 50
        for line in medical_text:
            draw.text((50, y_position), line, fill='black')
            y_position += 40
        
        # Verify image was created
        assert img.size == (800, 600)
        assert img.mode == 'RGB'
        
        # Convert to array to verify content
        img_array = np.array(img)
        
        # Should not be all white (has text)
        assert not np.all(img_array == 255)

    def test_examples_dictionary_structure(self):
        """Test that examples dictionary has proper structure"""
        # Test the actual examples from the app
        try:
            from app import examples
            
            # Check required keys exist
            required_keys = ['chest_pain', 'diabetes', 'pediatric']
            for key in required_keys:
                assert key in examples
                
            # Check content quality
            for key, content in examples.items():
                assert len(content) > 100  # Substantial content
                assert "Patient:" in content
                assert any(word in content.lower() for word in 
                          ["complaint", "history", "exam", "assessment", "plan"])
                
        except ImportError:
            pytest.skip("examples not available in app.py")

    def test_file_extensions_validation(self):
        """Test file extension validation logic"""
        allowed_extensions = ['.png', '.jpg', '.jpeg', '.txt', '.doc', '.docx', '.pdf']
        
        # Test valid extensions
        valid_files = ['image.png', 'document.pdf', 'notes.txt', 'scan.jpg']
        for filename in valid_files:
            file_ext = os.path.splitext(filename.lower())[1]
            assert file_ext in allowed_extensions
        
        # Test invalid extensions
        invalid_files = ['file.xyz', 'document.zip', 'image.bmp']
        for filename in invalid_files:
            file_ext = os.path.splitext(filename.lower())[1]
            assert file_ext not in allowed_extensions

# tests/test_error_scenarios_real.py
import pytest
import sys
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

class TestRealErrorScenarios:
    """Test real error handling scenarios"""
    
    def test_missing_dependencies_handling(self):
        """Test what happens when dependencies are missing"""
        # Test if app handles missing dependencies gracefully
        try:
            import easyocr
            EASYOCR_AVAILABLE = True
        except ImportError:
            EASYOCR_AVAILABLE = False
        
        try:
            import pytesseract
            TESSERACT_AVAILABLE = True
        except ImportError:
            TESSERACT_AVAILABLE = False
        
        # At least one OCR method should be available for the app to work
        if not EASYOCR_AVAILABLE and not TESSERACT_AVAILABLE:
            pytest.skip("No OCR dependencies available")

    def test_torch_availability(self):
        """Test PyTorch availability and device detection"""
        import torch
        
        # Test device detection logic
        device = "cuda" if torch.cuda.is_available() else "cpu"
        assert device in ["cuda", "cpu"]
        
        # Test tensor creation on detected device
        test_tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
        assert test_tensor is not None

    def test_image_format_handling(self):
        """Test handling of different image formats"""
        from PIL import Image
        
        # Test RGB image
        rgb_img = Image.new('RGB', (100, 100), color='white')
        assert rgb_img.mode == 'RGB'
        
        # Test grayscale image
        gray_img = Image.new('L', (100, 100), color=128)
        assert gray_img.mode == 'L'
        
        # Test RGBA image
        rgba_img = Image.new('RGBA', (100, 100), color=(255, 255, 255, 255))
        assert rgba_img.mode == 'RGBA'

# tests/test_performance_real.py
import pytest
import time
import sys
import os
from PIL import Image

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

class TestRealPerformance:
    """Test actual performance characteristics"""
    
    def test_image_preprocessing_speed(self):
        """Test image preprocessing performance"""
        try:
            from app import preprocess_image_for_ocr
            
            # Create test image
            test_image = Image.new('RGB', (1000, 800), color='white')
            
            # Time the preprocessing
            start_time = time.time()
            result = preprocess_image_for_ocr(test_image)
            end_time = time.time()
            
            processing_time = end_time - start_time
            
            # Should process reasonably quickly (under 5 seconds)
            assert processing_time < 5.0
            assert result is not None
            
        except ImportError:
            pytest.skip("preprocess_image_for_ocr not available")

    def test_text_cleaning_speed(self):
        """Test text cleaning performance"""
        try:
            from app import clean_extracted_text
            
            # Create large text with lots of cleaning needed
            dirty_text = "  \n  ".join([f"Line {i} with | pipes and _ underscores  " 
                                       for i in range(1000)])
            
            start_time = time.time()
            clean_text = clean_extracted_text(dirty_text)
            end_time = time.time()
            
            processing_time = end_time - start_time
            
            # Should clean text quickly (under 1 second)
            assert processing_time < 1.0
            assert len(clean_text) > 0
            assert "|" not in clean_text
            
        except ImportError:
            pytest.skip("clean_extracted_text not available")

# tests/test_configuration_real.py
import pytest
import sys
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

class TestRealConfiguration:
    """Test actual app configuration"""
    
    def test_environment_variables(self):
        """Test environment variable handling"""
        # Test HF_TOKEN detection
        hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
        
        # If no token, app should handle gracefully
        if not hf_token:
            # This is fine for testing - app should handle missing tokens
            pass
        else:
            # If token exists, it should be a string
            assert isinstance(hf_token, str)
            assert len(hf_token) > 10  # Tokens should be substantial

    def test_cuda_device_configuration(self):
        """Test CUDA device configuration"""
        import torch
        
        # Test device selection logic
        cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "")
        
        if cuda_visible == "":
            # Should fall back to CPU
            expected_device = "cpu"
        elif torch.cuda.is_available():
            expected_device = "cuda"
        else:
            expected_device = "cpu"
        
        # Verify device is valid
        assert expected_device in ["cuda", "cpu"]

    def test_model_configuration(self):
        """Test model configuration parameters"""
        # Test model ID
        model_id = "google/gemma-3n-e2b-it"
        assert isinstance(model_id, str)
        assert "/" in model_id  # Should be in format "org/model"
        
        # Test model parameters that would be used
        max_new_tokens = 512
        temperature = 0.3
        
        assert isinstance(max_new_tokens, int)
        assert max_new_tokens > 0
        assert isinstance(temperature, float)
        assert 0 <= temperature <= 1.0