Spaces:

VOIDER
/

image-evaluation-tool

Sleeping

File size: 2,988 Bytes

51aec78

import numpy as np
import logging

logger = logging.getLogger(__name__)

def calculate_final_score(
    quality_score: float,
    aesthetics_score: float,
    prompt_score: float,
    ai_detection_score: float,
    has_prompt: bool = True
) -> float:
    """
    Calculate weighted composite score for image evaluation.
    
    Args:
        quality_score: Technical image quality (0-10)
        aesthetics_score: Visual appeal score (0-10)
        prompt_score: Prompt adherence score (0-10)
        ai_detection_score: AI generation probability (0-1)
        has_prompt: Whether prompt metadata is available
        
    Returns:
        Final composite score (0-10)
    """
    try:
        # Validate and clamp input scores
        quality_score = max(0.0, min(10.0, quality_score))
        aesthetics_score = max(0.0, min(10.0, aesthetics_score))
        prompt_score = max(0.0, min(10.0, prompt_score))
        ai_detection_score = max(0.0, min(1.0, ai_detection_score))
        
        # FIX: Invert and scale the AI detection score to a 0-10 range
        # A low AI detection probability (good) results in a high score.
        inverted_ai_score = (1 - ai_detection_score) * 10
        
        if has_prompt:
            # Standard weights when prompt is available
            weights = {
                'quality': 0.25,      # 25% - Technical quality
                'aesthetics': 0.35,   # 35% - Visual appeal (highest weight)
                'prompt': 0.25,       # 25% - Prompt following
                'ai_detection': 0.15  # 15% - Authenticity (inverted detection score)
            }
            
            # FIX: Correctly calculate the weighted score. The sum of weights is 1.0.
            score = (
                quality_score * weights['quality'] +
                aesthetics_score * weights['aesthetics'] +
                prompt_score * weights['prompt'] +
                inverted_ai_score * weights['ai_detection']
            )
        else:
            # Redistribute prompt weight when no prompt available
            weights = {
                'quality': 0.375,     # 25% + 12.5% from prompt
                'aesthetics': 0.475,  # 35% + 12.5% from prompt
                'ai_detection': 0.15  # 15% - Authenticity
            }
            
            # FIX: Correctly calculate the weighted score without prompt. Sum of weights is 1.0.
            score = (
                quality_score * weights['quality'] +
                aesthetics_score * weights['aesthetics'] +
                inverted_ai_score * weights['ai_detection']
            )
        
        # Ensure final score is within the valid 0-10 range
        final_score = max(0.0, min(10.0, score))
        
        logger.debug(f"Score calculation - Final: {final_score:.2f}")
        
        return final_score
        
    except Exception as e:
        logger.error(f"Error calculating final score: {str(e)}")
        return 0.0 # Return 0.0 on error to clearly indicate failure