Spaces:

VOIDER
/

image-evaluation-tool

Sleeping

App Files Files Community

VOIDER commited on 21 days ago

Commit

ceb655b

verified ·

1 Parent(s): 4cc53ed

Upload 3 files

Browse files

Files changed (1) hide show

utils/scoring.py +77 -0

utils/scoring.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import numpy as np
+import logging
+logger = logging.getLogger(__name__)
+def calculate_final_score(
+    quality_score: float,
+    aesthetics_score: float,
+    prompt_score: float,
+    ai_detection_score: float,
+    has_prompt: bool = True
+) -> float:
+    """
+    Calculate weighted composite score for image evaluation.
+    Args:
+        quality_score: Technical image quality (0-10)
+        aesthetics_score: Visual appeal score (0-10)
+        prompt_score: Prompt adherence score (0-10)
+        ai_detection_score: AI generation probability (0-1)
+        has_prompt: Whether prompt metadata is available
+    Returns:
+        Final composite score (0-10)
+    """
+    try:
+        # Validate and clamp input scores
+        quality_score = max(0.0, min(10.0, quality_score))
+        aesthetics_score = max(0.0, min(10.0, aesthetics_score))
+        prompt_score = max(0.0, min(10.0, prompt_score))
+        ai_detection_score = max(0.0, min(1.0, ai_detection_score))
+        # FIX: Invert and scale the AI detection score to a 0-10 range
+        # A low AI detection probability (good) results in a high score.
+        inverted_ai_score = (1 - ai_detection_score) * 10
+        if has_prompt:
+            # Standard weights when prompt is available
+            weights = {
+                'quality': 0.25,      # 25% - Technical quality
+                'aesthetics': 0.35,   # 35% - Visual appeal (highest weight)
+                'prompt': 0.25,       # 25% - Prompt following
+                'ai_detection': 0.15  # 15% - Authenticity (inverted detection score)
+            }
+            # FIX: Correctly calculate the weighted score. The sum of weights is 1.0.
+            score = (
+                quality_score * weights['quality'] +
+                aesthetics_score * weights['aesthetics'] +
+                prompt_score * weights['prompt'] +
+                inverted_ai_score * weights['ai_detection']
+            )
+        else:
+            # Redistribute prompt weight when no prompt available
+            weights = {
+                'quality': 0.375,     # 25% + 12.5% from prompt
+                'aesthetics': 0.475,  # 35% + 12.5% from prompt
+                'ai_detection': 0.15  # 15% - Authenticity
+            }
+            # FIX: Correctly calculate the weighted score without prompt. Sum of weights is 1.0.
+            score = (
+                quality_score * weights['quality'] +
+                aesthetics_score * weights['aesthetics'] +
+                inverted_ai_score * weights['ai_detection']
+            )
+        # Ensure final score is within the valid 0-10 range
+        final_score = max(0.0, min(10.0, score))
+        logger.debug(f"Score calculation - Final: {final_score:.2f}")
+        return final_score
+    except Exception as e:
+        logger.error(f"Error calculating final score: {str(e)}")
+        return 0.0 # Return 0.0 on error to clearly indicate failure