sohaibchachar's picture
Update app.py
5cb2941
# -*- coding: utf-8 -*-
"""Untitled1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1OxX4WwJL-ZQPL79F5LBrHOxsXcePSiWN
"""
#install gradio
#sudo apt update && sudo apt install ffmpeg
#pip install ffmpeg
#pip install git+https://github.com/openai/whisper.git
#pip install pytube transformers moviepy TTS youtube_transcript_api pydub SentencePiece pysubs2
import os
import gradio as gr
import re
import nltk
from pytube import YouTube
from transformers import MarianMTModel, MarianTokenizer
from moviepy.editor import VideoFileClip, concatenate_audioclips, AudioFileClip
from whisper import load_model
from TTS.api import TTS
from pydub import AudioSegment, silence
import pysubs2
import subprocess
nltk.download('punkt')
model_name = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
tts = TTS(model_name="tts_models/fr/css10/vits")
whisper_model = load_model("base")
save_path = "videos6"
os.makedirs(save_path, exist_ok=True)
audio_folder = os.path.join(save_path, "audio")
os.makedirs(audio_folder, exist_ok=True)
tts_audio_folder = os.path.join(save_path, "tts_audio")
os.makedirs(tts_audio_folder, exist_ok=True)
def cleaned_video(video_name):
return re.sub(r'[\\/*?:"<>|]', "", video_name)
def read_subtitles(subtitles_file):
with open(subtitles_file, 'r', encoding='utf-8') as file:
return file.read()
def translate(text):
sentences = nltk.tokenize.sent_tokenize(text)
translations = []
for sentence in sentences:
batch = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
gen = model.generate(**batch)
translation = tokenizer.batch_decode(gen, skip_special_tokens=True)
translations.append(translation[0])
return ' '.join(translations)
def generate_tts_audio(text, start, end, tts_audio_path):
tts.tts_to_file(text=text, file_path=tts_audio_path)
tts_audio = AudioSegment.from_mp3(tts_audio_path)
expected_duration = (end - start) * 1000
actual_duration = len(tts_audio)
if actual_duration < expected_duration:
silence_duration = expected_duration - actual_duration
silence_audio = AudioSegment.silent(duration=silence_duration)
tts_audio += silence_audio
tts_audio.export(tts_audio_path, format='wav')
return True
def create_subtitles(segments, subtitles_file):
subs = pysubs2.SSAFile()
for start, end, text in segments:
start_ms = int(start * 1000)
end_ms = int(end * 1000)
subs.append(pysubs2.SSAEvent(start=start_ms, end=end_ms, text=text))
subs.save(subtitles_file)
def embed_subtitles(video_path, subtitles_path, output_path):
command = [
'ffmpeg',
'-i', video_path,
'-vf', f"subtitles={subtitles_path}",
'-c:a', 'copy',
output_path
]
subprocess.run(command, check=True)
return
def process_video(url):
yt = YouTube(url)
video_id = yt.video_id
yt_title_cleaned = cleaned_video(yt.title)
video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
if not video_stream:
print("No suitable video stream found.")
return None
video_path = os.path.join(save_path, yt_title_cleaned + ".mp4")
video_stream.download(output_path=save_path, filename=yt_title_cleaned + ".mp4")
video_clip = VideoFileClip(video_path)
audio_path = os.path.join(audio_folder, yt_title_cleaned + ".mp3")
video_clip.audio.write_audiofile(audio_path)
print("Converting Audio to Text.")
result = whisper_model.transcribe(audio_path)
segments = []
for segment in result["segments"]:
start_time, end_time, text = segment["start"], segment["end"], segment["text"]
segments.append((start_time, end_time, text))
print("Translating text into segments and making an Audio file for each segment.")
translated_segments = []
tts_clips = []
for start, end, text in segments:
translated_text = translate(text)
translated_segments.append((start, end, translated_text))
tts_audio_path = os.path.join(tts_audio_folder, f"tts_{start}_{end}.wav")
generate_tts_audio(translated_text, start, end, tts_audio_path)
tts_clip = AudioFileClip(tts_audio_path).subclip(0, end - start)
tts_clips.append(tts_clip)
combined_tts_audio = concatenate_audioclips(tts_clips)
final_video = video_clip.set_audio(combined_tts_audio)
final_video_path = os.path.join(save_path, yt_title_cleaned + "_translated.mp4")
final_video.write_videofile(final_video_path)
# Save the original and translated subtitles
original_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_original.srt")
create_subtitles(segments, original_subtitles_file)
translated_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_translated.srt")
create_subtitles(translated_segments, translated_subtitles_file)
# Read and return subtitles text
original_subtitles_text = read_subtitles(original_subtitles_file)
translated_subtitles_text = read_subtitles(translated_subtitles_file)
return final_video_path, original_subtitles_text, translated_subtitles_text
#url = 'https://youtu.be/AlhELuRMJ_s?si=r2la5DQlOU49QDPW'
#processed_video_path = process_video(url)
#if processed_video_path:
#print(f"Processed video saved at {processed_video_path}")
#else:
#print("Failed to process video.")
with gr.Blocks() as demo:
with gr.Row():
text_box = gr.Textbox(label="Enter YouTube Video Link", placeholder="Text box for link")
submit_btn = gr.Button("Submit")
video = gr.Video()
with gr.Row():
original_subs_output = gr.Textbox(label="Original Subs", placeholder="Original Subs", interactive=False)
translated_subs_output = gr.Textbox(label="Translated Subs", placeholder="Translated Subs", interactive=False)
submit_btn.click(fn=process_video, inputs=text_box, outputs=[video, original_subs_output, translated_subs_output])
demo.launch()