Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import numpy as np
|
|
|
2 |
import concurrent.futures
|
3 |
import gradio as gr
|
4 |
from datetime import datetime
|
@@ -113,15 +114,37 @@ def transcribe_video(video_path):
|
|
113 |
# Transcribe with Whisper
|
114 |
result = model.transcribe(audio_path, word_timestamps=True)
|
115 |
|
116 |
-
# Extract timestamps and
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
# Get the detected language
|
126 |
detected_language = result["language"]
|
127 |
logger.debug(f"Detected language:\n{detected_language}")
|
|
|
1 |
import numpy as np
|
2 |
+
import re
|
3 |
import concurrent.futures
|
4 |
import gradio as gr
|
5 |
from datetime import datetime
|
|
|
114 |
# Transcribe with Whisper
|
115 |
result = model.transcribe(audio_path, word_timestamps=True)
|
116 |
|
117 |
+
# Extract timestamps, text, and compute word count
|
118 |
+
total_words = 0
|
119 |
+
total_duration = 0
|
120 |
+
transcript_with_timestamps = []
|
121 |
+
|
122 |
+
for segment in result["segments"]:
|
123 |
+
start = segment["start"]
|
124 |
+
end = segment["end"]
|
125 |
+
text = segment["text"]
|
126 |
+
# Count words in the segment
|
127 |
+
word_count = len(re.findall(r'\w+', text))
|
128 |
+
transcript_with_timestamps.append({
|
129 |
+
"start": start,
|
130 |
+
"end": end,
|
131 |
+
"text": text,
|
132 |
+
"word_count": word_count
|
133 |
+
})
|
134 |
+
|
135 |
+
total_words += word_count
|
136 |
+
total_duration += (end - start)
|
137 |
+
|
138 |
+
# Compute average words per second
|
139 |
+
avg_words_per_second = total_words / total_duration if total_duration > 0 else 0
|
140 |
+
|
141 |
+
# Add total statistics to the result
|
142 |
+
transcript_stats = {
|
143 |
+
"total_words": total_words,
|
144 |
+
"total_duration": total_duration,
|
145 |
+
"avg_words_per_second": avg_words_per_second
|
146 |
+
}
|
147 |
+
logger.debug(f"Transcription stats:\n{transcript_stats}")
|
148 |
# Get the detected language
|
149 |
detected_language = result["language"]
|
150 |
logger.debug(f"Detected language:\n{detected_language}")
|