Fast_api / vcs /compute_vcs.py
mulasagg's picture
API optimizations
aef3b1e
raw
history blame
2.18 kB
"""
Compute Voice Clarity Score from audio file
"""
import librosa
import numpy as np
from typing import Dict, Any
from .vcs import calculate_voice_clarity_score, get_clarity_insight
def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Compute Voice Clarity Score and its components from a speech sample.
Args:
file_path (str): Path to the audio file.
whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper)
Returns:
dict: A dictionary containing Voice Clarity Score and component scores.
"""
# Transcribe audio
result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False)
transcript = result.get("text", "").strip()
segments = result.get("segments", [])
# Validate early
if not transcript or not segments:
raise ValueError("Empty transcript or segments from Whisper.")
# Load audio
y, sr = librosa.load(file_path, sr=None)
duration = len(y) / sr if sr else 0.0
if duration <= 0:
raise ValueError("Audio duration invalid or zero.")
# Calculate Voice Clarity Score
clarity_result = calculate_voice_clarity_score(y, sr, segments)
# Add word count and duration info for reference
word_count = len(transcript.split())
clarity_result["components"]["word_count"] = word_count
clarity_result["components"]["duration"] = duration
return clarity_result
def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Comprehensive voice quality analysis including clarity.
Args:
file_path (str): Path to the audio file
whisper_model: Transcription model
Returns:
Dict[str, Any]: Complete voice quality analysis
"""
# Get Voice Clarity Score
clarity_results = compute_voice_clarity_score(file_path, whisper_model)
vcs = clarity_results["VCS"]
# Add to results
combined_results = {
"VCS": vcs,
}
return combined_results
# Ensure the functions are exposed when imported
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality']