import re import whisper from pydub import AudioSegment # For accurate duration calculation def analyze_fillers(file_path: str, model_size: str = "base", transcript = None ) -> dict: """ Analyzes English filler words in audio with proper duration handling. """ try: FILLER_WORDS = [ "um", "uh", "hmm", "ah", "er", "eh", "umm", "uhh", "mmm", "ahh", "err", "like", "you know", "well", "so", "actually", "basically", "right", "okay", "sort of", "kind of" ] # First get accurate duration using pydub audio = AudioSegment.from_file(file_path) duration = len(audio) / 1000 # Convert ms to seconds if transcript is None: # Then run Whisper transcription model = whisper.load_model(model_size) result = model.transcribe(file_path, word_timestamps=False, fp16=False) transcript = result["text"] # Case-insensitive regex matching pattern = r"(? 0 else 0 # Scoring if total_fillers == 0: filler_score = 100 elif filler_per_min < 1: filler_score = 90 elif filler_per_min < 3: filler_score = 80 elif filler_per_min < 5: filler_score = 60 elif filler_per_min < 10: filler_score = 40 else: filler_score = 20 # Generate insight top_fillers = sorted(filler_counts.items(), key=lambda x: x[1], reverse=True)[:2] if total_fillers == 0: insight = "Excellent! No filler words detected." elif total_fillers <= 2: insight = f"Minimal fillers ({total_fillers} total), mostly '{top_fillers[0][0]}'." elif total_fillers <= 5: examples = ", ".join(f"'{f[0]}'" for f in top_fillers) insight = f"Moderate fillers ({total_fillers} total), mainly {examples}." else: examples = ", ".join(f"'{f[0]}'" for f in top_fillers) insight = f"Excessive fillers ({total_fillers} total), dominated by {examples}." return { "filler_counts": filler_counts, "total_fillers": total_fillers, "filler_score": filler_score, "filler_rate_per_min": round(filler_per_min, 1), } except Exception as e: raise RuntimeError(f"Analysis failed: {str(e)}")