Spaces:

Curify
/

studio_V1

Sleeping

qqwjq1981 commited on Feb 25

Commit

e84d196

verified ·

1 Parent(s): 15a8996

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import numpy as np
 import concurrent.futures
 import gradio as gr
 from datetime import datetime
@@ -113,15 +114,37 @@ def transcribe_video(video_path):
     # Transcribe with Whisper
     result = model.transcribe(audio_path, word_timestamps=True)
-    # Extract timestamps and text
-    transcript_with_timestamps = [
-        {
-            "start": segment["start"],
-            "end": segment["end"],
-            "text": segment["text"]
-        }
-        for segment in result["segments"]
-    ]
     # Get the detected language
     detected_language = result["language"]
     logger.debug(f"Detected language:\n{detected_language}")

 import numpy as np
+import re
 import concurrent.futures
 import gradio as gr
 from datetime import datetime
     # Transcribe with Whisper
     result = model.transcribe(audio_path, word_timestamps=True)
+    # Extract timestamps, text, and compute word count
+    total_words = 0
+    total_duration = 0
+    transcript_with_timestamps = []
+    for segment in result["segments"]:
+        start = segment["start"]
+        end = segment["end"]
+        text = segment["text"]
+        # Count words in the segment
+        word_count = len(re.findall(r'\w+', text))
+        transcript_with_timestamps.append({
+            "start": start,
+            "end": end,
+            "text": text,
+            "word_count": word_count
+        })
+        total_words += word_count
+        total_duration += (end - start)
+    # Compute average words per second
+    avg_words_per_second = total_words / total_duration if total_duration > 0 else 0
+    # Add total statistics to the result
+    transcript_stats = {
+        "total_words": total_words,
+        "total_duration": total_duration,
+        "avg_words_per_second": avg_words_per_second
+    }
+    logger.debug(f"Transcription stats:\n{transcript_stats}")
     # Get the detected language
     detected_language = result["language"]
     logger.debug(f"Detected language:\n{detected_language}")