File size: 2,954 Bytes
8ad2ab3
 
1f9e7fa
8ad2ab3
aef3b1e
1f9e7fa
 
 
8ad2ab3
1f9e7fa
 
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
aef3b1e
 
 
 
 
8ad2ab3
1f9e7fa
 
 
8ad2ab3
1f9e7fa
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ad2ab3
1f9e7fa
 
 
 
8ad2ab3
1f9e7fa
8ad2ab3
1f9e7fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import re
import whisper
from pydub import AudioSegment  # For accurate duration calculation

def analyze_fillers(file_path: str, model_size: str = "base", transcript =  None ) -> dict:
    """
    Analyzes English filler words in audio with proper duration handling.
    """
    try:
        FILLER_WORDS = [
            "um", "uh", "hmm", "ah", "er", "eh", 
            "umm", "uhh", "mmm", "ahh", "err",
            "like", "you know", "well", "so", "actually", "basically",
            "right", "okay", "sort of", "kind of"
        ]
        
        # First get accurate duration using pydub
        audio = AudioSegment.from_file(file_path)
        duration = len(audio) / 1000  # Convert ms to seconds
        
        if transcript is None:
            # Then run Whisper transcription
            model = whisper.load_model(model_size)
            result = model.transcribe(file_path, word_timestamps=False, fp16=False)
            transcript = result["text"]
        
        # Case-insensitive regex matching
        pattern = r"(?<!\w)(" + "|".join(map(re.escape, FILLER_WORDS)) + r")(?!\w)"
        matches = re.findall(pattern, transcript, re.IGNORECASE)
        
        # Count occurrences
        filler_counts = {}
        for word in matches:
            key = word.lower()
            filler_counts[key] = filler_counts.get(key, 0) + 1
        total_fillers = sum(filler_counts.values())
        
        # Calculate rate per minute
        filler_per_min = (total_fillers / duration) * 60 if duration > 0 else 0
        
        # Scoring
        if total_fillers == 0:
            filler_score = 100
        elif filler_per_min < 1:
            filler_score = 90
        elif filler_per_min < 3:
            filler_score = 80
        elif filler_per_min < 5:
            filler_score = 60
        elif filler_per_min < 10:
            filler_score = 40
        else:
            filler_score = 20
        
        # Generate insight
        top_fillers = sorted(filler_counts.items(), key=lambda x: x[1], reverse=True)[:2]
        
        if total_fillers == 0:
            insight = "Excellent! No filler words detected."
        elif total_fillers <= 2:
            insight = f"Minimal fillers ({total_fillers} total), mostly '{top_fillers[0][0]}'."
        elif total_fillers <= 5:
            examples = ", ".join(f"'{f[0]}'" for f in top_fillers)
            insight = f"Moderate fillers ({total_fillers} total), mainly {examples}."
        else:
            examples = ", ".join(f"'{f[0]}'" for f in top_fillers)
            insight = f"Excessive fillers ({total_fillers} total), dominated by {examples}."
        
        return {
            "filler_counts": filler_counts,
            "total_fillers": total_fillers,
            "filler_score": filler_score,
            "filler_rate_per_min": round(filler_per_min, 1),
        }
        
    except Exception as e:
        raise RuntimeError(f"Analysis failed: {str(e)}")