Spaces:

cruvss
/

Fast_api

Sleeping

File size: 2,954 Bytes

8ad2ab3
 
1f9e7fa
8ad2ab3
aef3b1e
1f9e7fa
 
 
8ad2ab3
1f9e7fa
 
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
aef3b1e
 
 
 
 
8ad2ab3
1f9e7fa
 
 
8ad2ab3
1f9e7fa
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
8ad2ab3
1f9e7fa
8ad2ab3
1f9e7fa

import re
import whisper
from pydub import AudioSegment  # For accurate duration calculation

def analyze_fillers(file_path: str, model_size: str = "base", transcript =  None ) -> dict:
    """
    Analyzes English filler words in audio with proper duration handling.
    """
    try:
        FILLER_WORDS = [
            "um", "uh", "hmm", "ah", "er", "eh", 
            "umm", "uhh", "mmm", "ahh", "err",
            "like", "you know", "well", "so", "actually", "basically",
            "right", "okay", "sort of", "kind of"
        ]
        
        # First get accurate duration using pydub
        audio = AudioSegment.from_file(file_path)
        duration = len(audio) / 1000  # Convert ms to seconds
        
        if transcript is None:
            # Then run Whisper transcription
            model = whisper.load_model(model_size)
            result = model.transcribe(file_path, word_timestamps=False, fp16=False)
            transcript = result["text"]
        
        # Case-insensitive regex matching
        pattern = r"(?<!\w)(" + "|".join(map(re.escape, FILLER_WORDS)) + r")(?!\w)"
        matches = re.findall(pattern, transcript, re.IGNORECASE)
        
        # Count occurrences
        filler_counts = {}
        for word in matches:
            key = word.lower()
            filler_counts[key] = filler_counts.get(key, 0) + 1
        total_fillers = sum(filler_counts.values())
        
        # Calculate rate per minute
        filler_per_min = (total_fillers / duration) * 60 if duration > 0 else 0
        
        # Scoring
        if total_fillers == 0:
            filler_score = 100
        elif filler_per_min < 1:
            filler_score = 90
        elif filler_per_min < 3:
            filler_score = 80
        elif filler_per_min < 5:
            filler_score = 60
        elif filler_per_min < 10:
            filler_score = 40
        else:
            filler_score = 20
        
        # Generate insight
        top_fillers = sorted(filler_counts.items(), key=lambda x: x[1], reverse=True)[:2]
        
        if total_fillers == 0:
            insight = "Excellent! No filler words detected."
        elif total_fillers <= 2:
            insight = f"Minimal fillers ({total_fillers} total), mostly '{top_fillers[0][0]}'."
        elif total_fillers <= 5:
            examples = ", ".join(f"'{f[0]}'" for f in top_fillers)
            insight = f"Moderate fillers ({total_fillers} total), mainly {examples}."
        else:
            examples = ", ".join(f"'{f[0]}'" for f in top_fillers)
            insight = f"Excessive fillers ({total_fillers} total), dominated by {examples}."
        
        return {
            "filler_counts": filler_counts,
            "total_fillers": total_fillers,
            "filler_score": filler_score,
            "filler_rate_per_min": round(filler_per_min, 1),
        }
        
    except Exception as e:
        raise RuntimeError(f"Analysis failed: {str(e)}")