Update app.py
Browse files
app.py
CHANGED
@@ -298,33 +298,46 @@ def replace_audio_in_video(video_path: str, new_audio_path: str, final_video_pat
|
|
298 |
|
299 |
def upload_and_manage(file, target_language, mode="transcription"):
|
300 |
if file is None:
|
|
|
301 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
302 |
|
303 |
try:
|
304 |
start_time = time.time() # Start the timer
|
|
|
305 |
|
306 |
# Define paths for audio and output files
|
307 |
audio_path = "audio.wav"
|
308 |
output_video_path = "output_video.mp4"
|
309 |
voiceover_path = "voiceover.wav"
|
|
|
310 |
|
311 |
list_available_fonts()
|
312 |
|
313 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
|
|
314 |
transcription_json, source_language = transcribe_video(file.name)
|
|
|
315 |
|
316 |
# Step 2: Translate the transcription
|
|
|
317 |
translated_json = translate_text(transcription_json, source_language, target_language)
|
|
|
318 |
|
319 |
# Step 3: Add transcript to video based on timestamps
|
|
|
320 |
add_transcript_to_video(file.name, translated_json, output_video_path)
|
|
|
321 |
|
322 |
-
# Step 4 (Optional): Generate voiceover if mode is "
|
323 |
if mode == "Transcription with Voiceover":
|
|
|
324 |
generate_voiceover(translated_json, target_language, voiceover_path)
|
|
|
325 |
replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
|
|
|
326 |
|
327 |
# Convert translated JSON into a format for the editable table
|
|
|
328 |
editable_table = [
|
329 |
[float(entry["start"]), entry["original"], entry["translated"], float(entry["end"])]
|
330 |
for entry in translated_json
|
@@ -333,12 +346,13 @@ def upload_and_manage(file, target_language, mode="transcription"):
|
|
333 |
# Calculate elapsed time
|
334 |
elapsed_time = time.time() - start_time
|
335 |
elapsed_time_display = f"Processing completed in {elapsed_time:.2f} seconds."
|
|
|
336 |
|
337 |
return translated_json, editable_table, output_video_path, elapsed_time_display
|
338 |
|
339 |
except Exception as e:
|
|
|
340 |
return None, [], None, f"An error occurred: {str(e)}"
|
341 |
-
|
342 |
# Gradio Interface with Tabs
|
343 |
def build_interface():
|
344 |
with gr.Blocks(css=css) as demo:
|
|
|
298 |
|
299 |
def upload_and_manage(file, target_language, mode="transcription"):
|
300 |
if file is None:
|
301 |
+
logger.info("No file uploaded. Please upload a video/audio file.")
|
302 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
303 |
|
304 |
try:
|
305 |
start_time = time.time() # Start the timer
|
306 |
+
logger.info(f"Started processing file: {file.name}")
|
307 |
|
308 |
# Define paths for audio and output files
|
309 |
audio_path = "audio.wav"
|
310 |
output_video_path = "output_video.mp4"
|
311 |
voiceover_path = "voiceover.wav"
|
312 |
+
logger.info(f"Using audio path: {audio_path}, output video path: {output_video_path}, voiceover path: {voiceover_path}")
|
313 |
|
314 |
list_available_fonts()
|
315 |
|
316 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
317 |
+
logger.info("Transcribing audio...")
|
318 |
transcription_json, source_language = transcribe_video(file.name)
|
319 |
+
logger.info(f"Transcription completed. Detected source language: {source_language}")
|
320 |
|
321 |
# Step 2: Translate the transcription
|
322 |
+
logger.info(f"Translating transcription from {source_language} to {target_language}...")
|
323 |
translated_json = translate_text(transcription_json, source_language, target_language)
|
324 |
+
logger.info(f"Translation completed. Number of translated segments: {len(translated_json)}")
|
325 |
|
326 |
# Step 3: Add transcript to video based on timestamps
|
327 |
+
logger.info("Adding translated transcript to video...")
|
328 |
add_transcript_to_video(file.name, translated_json, output_video_path)
|
329 |
+
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
330 |
|
331 |
+
# Step 4 (Optional): Generate voiceover if mode is "Transcription with Voiceover"
|
332 |
if mode == "Transcription with Voiceover":
|
333 |
+
logger.info("Generating voiceover for video...")
|
334 |
generate_voiceover(translated_json, target_language, voiceover_path)
|
335 |
+
logger.info("Voiceover generated. Replacing audio in video...")
|
336 |
replace_audio_in_video(output_video_path, voiceover_path, output_video_path)
|
337 |
+
logger.info("Audio replaced in video.")
|
338 |
|
339 |
# Convert translated JSON into a format for the editable table
|
340 |
+
logger.info("Converting translated JSON into editable table format...")
|
341 |
editable_table = [
|
342 |
[float(entry["start"]), entry["original"], entry["translated"], float(entry["end"])]
|
343 |
for entry in translated_json
|
|
|
346 |
# Calculate elapsed time
|
347 |
elapsed_time = time.time() - start_time
|
348 |
elapsed_time_display = f"Processing completed in {elapsed_time:.2f} seconds."
|
349 |
+
logger.info(f"Processing completed in {elapsed_time:.2f} seconds.")
|
350 |
|
351 |
return translated_json, editable_table, output_video_path, elapsed_time_display
|
352 |
|
353 |
except Exception as e:
|
354 |
+
logger.error(f"An error occurred: {str(e)}")
|
355 |
return None, [], None, f"An error occurred: {str(e)}"
|
|
|
356 |
# Gradio Interface with Tabs
|
357 |
def build_interface():
|
358 |
with gr.Blocks(css=css) as demo:
|