Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
import re
|
2 |
import numpy as np
|
|
|
3 |
import concurrent.futures
|
4 |
import gradio as gr
|
5 |
from datetime import datetime
|
@@ -40,6 +40,15 @@ def silence(duration, fps=44100):
|
|
40 |
Returns a silent AudioClip of the specified duration.
|
41 |
"""
|
42 |
return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# Define the passcode
|
45 |
PASSCODE = "show_feedback_db"
|
@@ -78,7 +87,6 @@ css = """
|
|
78 |
}
|
79 |
"""
|
80 |
|
81 |
-
|
82 |
# Function to save feedback or provide access to the database file
|
83 |
def handle_feedback(feedback):
|
84 |
feedback = feedback.strip() # Clean up leading/trailing whitespace
|
@@ -123,15 +131,14 @@ def transcribe_video(video_path):
|
|
123 |
start = segment["start"]
|
124 |
end = segment["end"]
|
125 |
text = segment["text"]
|
126 |
-
|
127 |
-
word_count = len(re.findall(r'\w+', text))
|
128 |
transcript_with_timestamps.append({
|
129 |
"start": start,
|
130 |
"end": end,
|
131 |
-
"text": text
|
132 |
-
"word_count": word_count
|
133 |
})
|
134 |
-
|
|
|
135 |
total_words += word_count
|
136 |
total_duration += (end - start)
|
137 |
|
@@ -270,13 +277,13 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
|
|
270 |
audio_segment = None
|
271 |
if add_voiceover:
|
272 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
273 |
-
|
|
|
274 |
audio_clip = AudioFileClip(segment_audio_path)
|
275 |
# Get and log all methods in AudioFileClip
|
276 |
logger.info("Methods in AudioFileClip:")
|
277 |
for method in dir(audio_clip):
|
278 |
logger.info(method)
|
279 |
-
desired_duration = entry["end"] - entry["start"]
|
280 |
|
281 |
# Log duration of the audio clip and the desired duration for debugging.
|
282 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
@@ -355,7 +362,26 @@ def generate_voiceover(translated_json, language, output_audio_path):
|
|
355 |
except Exception as e:
|
356 |
raise ValueError(f"Error generating voiceover: {e}")
|
357 |
|
358 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
"""
|
360 |
Generate voiceover from translated text for a given language using OpenAI TTS API.
|
361 |
"""
|
@@ -373,11 +399,13 @@ def generate_voiceover_OpenAI(translated_json, language, output_audio_path):
|
|
373 |
|
374 |
while retry_count < max_retries:
|
375 |
try:
|
|
|
376 |
# Create the speech using OpenAI TTS API
|
377 |
response = client.audio.speech.create(
|
378 |
model=model,
|
379 |
voice=voice,
|
380 |
-
input=full_text
|
|
|
381 |
)
|
382 |
# Save the audio to the specified path
|
383 |
with open(output_audio_path, 'wb') as f:
|
|
|
|
|
1 |
import numpy as np
|
2 |
+
import re
|
3 |
import concurrent.futures
|
4 |
import gradio as gr
|
5 |
from datetime import datetime
|
|
|
40 |
Returns a silent AudioClip of the specified duration.
|
41 |
"""
|
42 |
return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
|
43 |
+
|
44 |
+
def count_words_or_characters(text):
|
45 |
+
# Count non-Chinese words
|
46 |
+
non_chinese_words = len(re.findall(r'\b[a-zA-Z0-9]+\b', text))
|
47 |
+
|
48 |
+
# Count Chinese characters
|
49 |
+
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
|
50 |
+
|
51 |
+
return non_chinese_words + chinese_chars
|
52 |
|
53 |
# Define the passcode
|
54 |
PASSCODE = "show_feedback_db"
|
|
|
87 |
}
|
88 |
"""
|
89 |
|
|
|
90 |
# Function to save feedback or provide access to the database file
|
91 |
def handle_feedback(feedback):
|
92 |
feedback = feedback.strip() # Clean up leading/trailing whitespace
|
|
|
131 |
start = segment["start"]
|
132 |
end = segment["end"]
|
133 |
text = segment["text"]
|
134 |
+
|
|
|
135 |
transcript_with_timestamps.append({
|
136 |
"start": start,
|
137 |
"end": end,
|
138 |
+
"text": text
|
|
|
139 |
})
|
140 |
+
|
141 |
+
word_count = count_words_or_characters(text)
|
142 |
total_words += word_count
|
143 |
total_duration += (end - start)
|
144 |
|
|
|
277 |
audio_segment = None
|
278 |
if add_voiceover:
|
279 |
segment_audio_path = f"segment_{i}_voiceover.wav"
|
280 |
+
desired_duration = entry["end"] - entry["start"]
|
281 |
+
generate_voiceover_OpenAI([entry], target_language, desired_duration, segment_audio_path)
|
282 |
audio_clip = AudioFileClip(segment_audio_path)
|
283 |
# Get and log all methods in AudioFileClip
|
284 |
logger.info("Methods in AudioFileClip:")
|
285 |
for method in dir(audio_clip):
|
286 |
logger.info(method)
|
|
|
287 |
|
288 |
# Log duration of the audio clip and the desired duration for debugging.
|
289 |
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
|
|
362 |
except Exception as e:
|
363 |
raise ValueError(f"Error generating voiceover: {e}")
|
364 |
|
365 |
+
def truncated_linear(x):
|
366 |
+
if x < 15:
|
367 |
+
return 1
|
368 |
+
elif x > 25:
|
369 |
+
return 1.2
|
370 |
+
else:
|
371 |
+
slope = (1.2 - 1) / (25 - 15)
|
372 |
+
return 1 + slope * (x - 15)
|
373 |
+
|
374 |
+
def calculate_speed(text, desired_duration):
|
375 |
+
# Calculate characters per second
|
376 |
+
char_count = len(text)
|
377 |
+
chars_per_second = char_count / (desired_duration + 0.001)
|
378 |
+
|
379 |
+
# Apply truncated linear function to get speed
|
380 |
+
speed = truncated_linear(chars_per_second)
|
381 |
+
|
382 |
+
return speed
|
383 |
+
|
384 |
+
def generate_voiceover_OpenAI(translated_json, language, desired_duration, output_audio_path):
|
385 |
"""
|
386 |
Generate voiceover from translated text for a given language using OpenAI TTS API.
|
387 |
"""
|
|
|
399 |
|
400 |
while retry_count < max_retries:
|
401 |
try:
|
402 |
+
speed_tts = calculate_speed(full_text, desired_duration)
|
403 |
# Create the speech using OpenAI TTS API
|
404 |
response = client.audio.speech.create(
|
405 |
model=model,
|
406 |
voice=voice,
|
407 |
+
input=full_text,
|
408 |
+
speed=speed_tts
|
409 |
)
|
410 |
# Save the audio to the specified path
|
411 |
with open(output_audio_path, 'wb') as f:
|