Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import numpy as np
|
|
|
|
| 2 |
import concurrent.futures
|
| 3 |
import gradio as gr
|
| 4 |
from datetime import datetime
|
|
@@ -113,15 +114,37 @@ def transcribe_video(video_path):
|
|
| 113 |
# Transcribe with Whisper
|
| 114 |
result = model.transcribe(audio_path, word_timestamps=True)
|
| 115 |
|
| 116 |
-
# Extract timestamps and
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
# Get the detected language
|
| 126 |
detected_language = result["language"]
|
| 127 |
logger.debug(f"Detected language:\n{detected_language}")
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
+
import re
|
| 3 |
import concurrent.futures
|
| 4 |
import gradio as gr
|
| 5 |
from datetime import datetime
|
|
|
|
| 114 |
# Transcribe with Whisper
|
| 115 |
result = model.transcribe(audio_path, word_timestamps=True)
|
| 116 |
|
| 117 |
+
# Extract timestamps, text, and compute word count
|
| 118 |
+
total_words = 0
|
| 119 |
+
total_duration = 0
|
| 120 |
+
transcript_with_timestamps = []
|
| 121 |
+
|
| 122 |
+
for segment in result["segments"]:
|
| 123 |
+
start = segment["start"]
|
| 124 |
+
end = segment["end"]
|
| 125 |
+
text = segment["text"]
|
| 126 |
+
# Count words in the segment
|
| 127 |
+
word_count = len(re.findall(r'\w+', text))
|
| 128 |
+
transcript_with_timestamps.append({
|
| 129 |
+
"start": start,
|
| 130 |
+
"end": end,
|
| 131 |
+
"text": text,
|
| 132 |
+
"word_count": word_count
|
| 133 |
+
})
|
| 134 |
+
|
| 135 |
+
total_words += word_count
|
| 136 |
+
total_duration += (end - start)
|
| 137 |
+
|
| 138 |
+
# Compute average words per second
|
| 139 |
+
avg_words_per_second = total_words / total_duration if total_duration > 0 else 0
|
| 140 |
+
|
| 141 |
+
# Add total statistics to the result
|
| 142 |
+
transcript_stats = {
|
| 143 |
+
"total_words": total_words,
|
| 144 |
+
"total_duration": total_duration,
|
| 145 |
+
"avg_words_per_second": avg_words_per_second
|
| 146 |
+
}
|
| 147 |
+
logger.debug(f"Transcription stats:\n{transcript_stats}")
|
| 148 |
# Get the detected language
|
| 149 |
detected_language = result["language"]
|
| 150 |
logger.debug(f"Detected language:\n{detected_language}")
|