qqwjq1981 commited on
Commit
ba43985
·
verified ·
1 Parent(s): fa4e626

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -11
app.py CHANGED
@@ -1,5 +1,5 @@
1
- import re
2
  import numpy as np
 
3
  import concurrent.futures
4
  import gradio as gr
5
  from datetime import datetime
@@ -40,6 +40,15 @@ def silence(duration, fps=44100):
40
  Returns a silent AudioClip of the specified duration.
41
  """
42
  return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
 
 
 
 
 
 
 
 
 
43
 
44
  # Define the passcode
45
  PASSCODE = "show_feedback_db"
@@ -78,7 +87,6 @@ css = """
78
  }
79
  """
80
 
81
-
82
  # Function to save feedback or provide access to the database file
83
  def handle_feedback(feedback):
84
  feedback = feedback.strip() # Clean up leading/trailing whitespace
@@ -123,15 +131,14 @@ def transcribe_video(video_path):
123
  start = segment["start"]
124
  end = segment["end"]
125
  text = segment["text"]
126
- # Count words in the segment
127
- word_count = len(re.findall(r'\w+', text))
128
  transcript_with_timestamps.append({
129
  "start": start,
130
  "end": end,
131
- "text": text,
132
- "word_count": word_count
133
  })
134
-
 
135
  total_words += word_count
136
  total_duration += (end - start)
137
 
@@ -270,13 +277,13 @@ def process_entry(entry, i, video_width, video_height, add_voiceover, target_lan
270
  audio_segment = None
271
  if add_voiceover:
272
  segment_audio_path = f"segment_{i}_voiceover.wav"
273
- generate_voiceover_OpenAI([entry], target_language, segment_audio_path)
 
274
  audio_clip = AudioFileClip(segment_audio_path)
275
  # Get and log all methods in AudioFileClip
276
  logger.info("Methods in AudioFileClip:")
277
  for method in dir(audio_clip):
278
  logger.info(method)
279
- desired_duration = entry["end"] - entry["start"]
280
 
281
  # Log duration of the audio clip and the desired duration for debugging.
282
  logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
@@ -355,7 +362,26 @@ def generate_voiceover(translated_json, language, output_audio_path):
355
  except Exception as e:
356
  raise ValueError(f"Error generating voiceover: {e}")
357
 
358
- def generate_voiceover_OpenAI(translated_json, language, output_audio_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  """
360
  Generate voiceover from translated text for a given language using OpenAI TTS API.
361
  """
@@ -373,11 +399,13 @@ def generate_voiceover_OpenAI(translated_json, language, output_audio_path):
373
 
374
  while retry_count < max_retries:
375
  try:
 
376
  # Create the speech using OpenAI TTS API
377
  response = client.audio.speech.create(
378
  model=model,
379
  voice=voice,
380
- input=full_text
 
381
  )
382
  # Save the audio to the specified path
383
  with open(output_audio_path, 'wb') as f:
 
 
1
  import numpy as np
2
+ import re
3
  import concurrent.futures
4
  import gradio as gr
5
  from datetime import datetime
 
40
  Returns a silent AudioClip of the specified duration.
41
  """
42
  return AudioArrayClip(np.zeros((int(fps*duration), 2)), fps=fps)
43
+
44
+ def count_words_or_characters(text):
45
+ # Count non-Chinese words
46
+ non_chinese_words = len(re.findall(r'\b[a-zA-Z0-9]+\b', text))
47
+
48
+ # Count Chinese characters
49
+ chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
50
+
51
+ return non_chinese_words + chinese_chars
52
 
53
  # Define the passcode
54
  PASSCODE = "show_feedback_db"
 
87
  }
88
  """
89
 
 
90
  # Function to save feedback or provide access to the database file
91
  def handle_feedback(feedback):
92
  feedback = feedback.strip() # Clean up leading/trailing whitespace
 
131
  start = segment["start"]
132
  end = segment["end"]
133
  text = segment["text"]
134
+
 
135
  transcript_with_timestamps.append({
136
  "start": start,
137
  "end": end,
138
+ "text": text
 
139
  })
140
+
141
+ word_count = count_words_or_characters(text)
142
  total_words += word_count
143
  total_duration += (end - start)
144
 
 
277
  audio_segment = None
278
  if add_voiceover:
279
  segment_audio_path = f"segment_{i}_voiceover.wav"
280
+ desired_duration = entry["end"] - entry["start"]
281
+ generate_voiceover_OpenAI([entry], target_language, desired_duration, segment_audio_path)
282
  audio_clip = AudioFileClip(segment_audio_path)
283
  # Get and log all methods in AudioFileClip
284
  logger.info("Methods in AudioFileClip:")
285
  for method in dir(audio_clip):
286
  logger.info(method)
 
287
 
288
  # Log duration of the audio clip and the desired duration for debugging.
289
  logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
 
362
  except Exception as e:
363
  raise ValueError(f"Error generating voiceover: {e}")
364
 
365
+ def truncated_linear(x):
366
+ if x < 15:
367
+ return 1
368
+ elif x > 25:
369
+ return 1.2
370
+ else:
371
+ slope = (1.2 - 1) / (25 - 15)
372
+ return 1 + slope * (x - 15)
373
+
374
+ def calculate_speed(text, desired_duration):
375
+ # Calculate characters per second
376
+ char_count = len(text)
377
+ chars_per_second = char_count / (desired_duration + 0.001)
378
+
379
+ # Apply truncated linear function to get speed
380
+ speed = truncated_linear(chars_per_second)
381
+
382
+ return speed
383
+
384
+ def generate_voiceover_OpenAI(translated_json, language, desired_duration, output_audio_path):
385
  """
386
  Generate voiceover from translated text for a given language using OpenAI TTS API.
387
  """
 
399
 
400
  while retry_count < max_retries:
401
  try:
402
+ speed_tts = calculate_speed(full_text, desired_duration)
403
  # Create the speech using OpenAI TTS API
404
  response = client.audio.speech.create(
405
  model=model,
406
  voice=voice,
407
+ input=full_text,
408
+ speed=speed_tts
409
  )
410
  # Save the audio to the specified path
411
  with open(output_audio_path, 'wb') as f: