from  .vers import calc_vers 
import librosa
import numpy as np
import math
from .filler_analyzer import detect_fillers
from .find_valence import  get_valence_score

def compute_vers_score(file_path: str, whisper_model) -> dict:
    """
    Compute VERS (Vocal Emotional Regulation Score) and its components from a speech sample.
    """
    result = whisper_model.transcribe(file_path)
    transcript = result.get("text", "").strip()
    segments = result.get("segments", [])

 

    # Filler count
    filler_count, _ = detect_fillers(transcript)

    # Load audio
    y, sr = librosa.load(file_path, sr=None)
    duration = len(y) / sr if sr else 0.0

    # Volume (RMS)
    rms = librosa.feature.rms(y=y)[0]
    mean_rms = float(np.mean(rms))
    mean_volume_db = 20 * math.log10(mean_rms + 1e-6) if mean_rms > 0 else -80.0
    volume_std = np.std(20 * np.log10(rms + 1e-6))

    # Max volume
    vol_max = np.max(np.abs(y)) if y.size > 0 else 0.0
    vol_max_db = 20 * math.log10(vol_max + 1e-6) if vol_max > 0 else -80.0

    # Pitch variation
    f0, voiced_flags, voiced_probs = librosa.pyin(
        y, sr=sr, fmin=80, fmax=400, frame_length=1024, hop_length=256, fill_na=np.nan)
    voiced_f0 = f0[~np.isnan(f0)]
    pitch_variation = 0.0
    if voiced_f0.size > 0:
        median_f0 = np.nanmedian(voiced_f0)
        median_f0 = max(median_f0, 1e-6)
        semitone_diffs = 12 * np.log2(voiced_f0 / median_f0)
        pitch_variation = float(np.nanstd(semitone_diffs))

    # Pause analysis
    total_speaking_time = 0.0
    long_pause_count = 0
    if segments:
        for seg in segments:
            total_speaking_time += (seg["end"] - seg["start"])
        for i in range(len(segments) - 1):
            pause_dur = segments[i+1]["start"] - segments[i]["end"]
            if pause_dur > 1.0:
                long_pause_count += 1
        first_start = segments[0]["start"]
        last_end = segments[-1]["end"]
        if first_start > 1.0:
            long_pause_count += 1
        if duration - last_end > 1.0:
            long_pause_count += 1

    # WPM
    words = transcript.split()
    word_count = len(words)
    words_per_min = (word_count / duration) * 60.0 if duration > 0 else 0.0

    
    valence_scores = get_valence_score(file_path)

    # Calculate VERS
    vers_result = calc_vers(
        filler_count=filler_count,
        long_pause_count=long_pause_count,
        pitch_variation=pitch_variation,
        mean_volume_db=mean_volume_db,
        vol_max_db=vol_max_db,
        wpm=words_per_min,
        volume_std=volume_std,
        valence_scores=valence_scores
    )

    # Include transcript optionally
    vers_result["transcript"] = transcript
    return vers_result