qqwjq1981 commited on
Commit
1a7629c
·
verified ·
1 Parent(s): 5a5be24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -113,15 +113,37 @@ def transcribe_video(video_path):
113
  # Transcribe with Whisper
114
  result = model.transcribe(audio_path, word_timestamps=True)
115
 
116
- # Extract timestamps and text
117
- transcript_with_timestamps = [
118
- {
119
- "start": segment["start"],
120
- "end": segment["end"],
121
- "text": segment["text"]
122
- }
123
- for segment in result["segments"]
124
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # Get the detected language
126
  detected_language = result["language"]
127
  logger.debug(f"Detected language:\n{detected_language}")
 
113
  # Transcribe with Whisper
114
  result = model.transcribe(audio_path, word_timestamps=True)
115
 
116
+ # Extract timestamps, text, and compute word count
117
+ total_words = 0
118
+ total_duration = 0
119
+ transcript_with_timestamps = []
120
+
121
+ for segment in result["segments"]:
122
+ start = segment["start"]
123
+ end = segment["end"]
124
+ text = segment["text"]
125
+ # Count words in the segment
126
+ word_count = len(re.findall(r'\w+', text))
127
+ transcript_with_timestamps.append({
128
+ "start": start,
129
+ "end": end,
130
+ "text": text,
131
+ "word_count": word_count
132
+ })
133
+
134
+ total_words += word_count
135
+ total_duration += (end - start)
136
+
137
+ # Compute average words per second
138
+ avg_words_per_second = total_words / total_duration if total_duration > 0 else 0
139
+
140
+ # Add total statistics to the result
141
+ transcript_stats = {
142
+ "total_words": total_words,
143
+ "total_duration": total_duration,
144
+ "avg_words_per_second": avg_words_per_second
145
+ }
146
+ logger.debug(f"Transcription stats:\n{transcript_stats}")
147
  # Get the detected language
148
  detected_language = result["language"]
149
  logger.debug(f"Detected language:\n{detected_language}")