"""
Scoring Module
Handles normalization and composite scoring for SQL evaluation results.
"""

import math
import numpy as np
from typing import Dict, Any, List
from dataclasses import dataclass


@dataclass
class Metrics:
    """Evaluation metrics for a SQL query."""
    correctness_exact: float  # 0.0 or 1.0
    result_match_f1: float    # 0.0 to 1.0
    exec_success: float       # 0.0 or 1.0
    latency_ms: float         # milliseconds
    readability: float        # 0.0 to 1.0 (based on SQL structure)
    dialect_ok: float         # 0.0 or 1.0


class ScoringEngine:
    """Engine for computing composite scores from evaluation metrics."""
    
    def __init__(self):
        # Weights for composite scoring (sum should be 1.0)
        self.weights = {
            'correctness_exact': 0.4,    # Most important
            'exec_success': 0.25,        # Very important
            'result_match_f1': 0.15,     # Important for partial credit
            'dialect_ok': 0.1,           # Important for dialect compliance
            'readability': 0.05,         # Minor factor
            'latency': 0.05              # Minor factor (normalized)
        }
        
        # Latency normalization parameters
        self.latency_min_ms = 10.0       # Minimum expected latency
        self.latency_max_ms = 10000.0    # Maximum expected latency
    
    def normalize_latency(self, latency_ms: float) -> float:
        """Normalize latency using log scale."""
        if latency_ms <= 0:
            return 0.0
        
        # Clamp to reasonable bounds
        latency_ms = max(self.latency_min_ms, min(latency_ms, self.latency_max_ms))
        
        # Log normalization: log(latency) / log(max_latency)
        normalized = math.log(latency_ms) / math.log(self.latency_max_ms)
        
        # Invert so lower latency = higher score
        return 1.0 - normalized
    
    def compute_readability_score(self, sql: str) -> float:
        """Compute readability score based on SQL structure."""
        if not sql or not sql.strip():
            return 0.0
        
        sql = sql.strip().upper()
        score = 0.0
        
        # Basic structure checks
        if 'SELECT' in sql:
            score += 0.2
        if 'FROM' in sql:
            score += 0.2
        if sql.count('(') == sql.count(')'):  # Balanced parentheses
            score += 0.1
        
        # Formatting checks
        if '\n' in sql:  # Multi-line formatting
            score += 0.1
        if sql.count(' ') > 5:  # Proper spacing
            score += 0.1
        
        # Complexity checks (more complex = slightly lower readability)
        complexity_penalty = 0.0
        if sql.count('JOIN') > 2:
            complexity_penalty += 0.1
        if sql.count('CASE') > 0:
            complexity_penalty += 0.05
        if sql.count('(') > 3:
            complexity_penalty += 0.05
        
        score = max(0.0, score - complexity_penalty)
        return min(1.0, score)
    
    def compute_composite_score(self, metrics: Metrics) -> float:
        """Compute composite score from individual metrics."""
        # Normalize latency
        normalized_latency = self.normalize_latency(metrics.latency_ms)
        
        # Compute readability if not provided
        if metrics.readability == 0.0:
            # This would need the actual SQL, but for now we'll use a default
            metrics.readability = 0.8  # Default reasonable readability
        
        # Weighted sum
        composite_score = (
            self.weights['correctness_exact'] * metrics.correctness_exact +
            self.weights['exec_success'] * metrics.exec_success +
            self.weights['result_match_f1'] * metrics.result_match_f1 +
            self.weights['dialect_ok'] * metrics.dialect_ok +
            self.weights['readability'] * metrics.readability +
            self.weights['latency'] * normalized_latency
        )
        
        return round(composite_score, 4)
    
    def compute_composite_score_from_dict(self, metrics_dict: Dict[str, Any]) -> float:
        """Compute composite score from metrics dictionary."""
        metrics = Metrics(
            correctness_exact=metrics_dict.get('correctness_exact', 0.0),
            result_match_f1=metrics_dict.get('result_match_f1', 0.0),
            exec_success=metrics_dict.get('exec_success', 0.0),
            latency_ms=metrics_dict.get('latency_ms', 0.0),
            readability=metrics_dict.get('readability', 0.0),
            dialect_ok=metrics_dict.get('dialect_ok', 0.0)
        )
        
        return self.compute_composite_score(metrics)
    
    def get_score_breakdown(self, metrics: Metrics) -> Dict[str, float]:
        """Get detailed breakdown of how the composite score was computed."""
        normalized_latency = self.normalize_latency(metrics.latency_ms)
        
        breakdown = {
            'correctness_exact': self.weights['correctness_exact'] * metrics.correctness_exact,
            'exec_success': self.weights['exec_success'] * metrics.exec_success,
            'result_match_f1': self.weights['result_match_f1'] * metrics.result_match_f1,
            'dialect_ok': self.weights['dialect_ok'] * metrics.dialect_ok,
            'readability': self.weights['readability'] * metrics.readability,
            'latency': self.weights['latency'] * normalized_latency,
            'composite_score': self.compute_composite_score(metrics)
        }
        
        return breakdown


# Global scoring engine instance
scoring_engine = ScoringEngine()