# -*- coding: utf-8 -*- """Untitled1.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1OxX4WwJL-ZQPL79F5LBrHOxsXcePSiWN """ #install gradio #sudo apt update && sudo apt install ffmpeg #pip install ffmpeg #pip install git+https://github.com/openai/whisper.git #pip install pytube transformers moviepy TTS youtube_transcript_api pydub SentencePiece pysubs2 import os import gradio as gr import re import nltk from pytube import YouTube from transformers import MarianMTModel, MarianTokenizer from moviepy.editor import VideoFileClip, concatenate_audioclips, AudioFileClip from whisper import load_model from TTS.api import TTS from pydub import AudioSegment, silence import pysubs2 import subprocess nltk.download('punkt') model_name = 'Helsinki-NLP/opus-mt-en-fr' tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) tts = TTS(model_name="tts_models/fr/css10/vits") whisper_model = load_model("base") save_path = "videos6" os.makedirs(save_path, exist_ok=True) audio_folder = os.path.join(save_path, "audio") os.makedirs(audio_folder, exist_ok=True) tts_audio_folder = os.path.join(save_path, "tts_audio") os.makedirs(tts_audio_folder, exist_ok=True) def cleaned_video(video_name): return re.sub(r'[\\/*?:"<>|]', "", video_name) def read_subtitles(subtitles_file): with open(subtitles_file, 'r', encoding='utf-8') as file: return file.read() def translate(text): sentences = nltk.tokenize.sent_tokenize(text) translations = [] for sentence in sentences: batch = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=512) gen = model.generate(**batch) translation = tokenizer.batch_decode(gen, skip_special_tokens=True) translations.append(translation[0]) return ' '.join(translations) def generate_tts_audio(text, start, end, tts_audio_path): tts.tts_to_file(text=text, file_path=tts_audio_path) tts_audio = AudioSegment.from_mp3(tts_audio_path) expected_duration = (end - start) * 1000 actual_duration = len(tts_audio) if actual_duration < expected_duration: silence_duration = expected_duration - actual_duration silence_audio = AudioSegment.silent(duration=silence_duration) tts_audio += silence_audio tts_audio.export(tts_audio_path, format='wav') return True def create_subtitles(segments, subtitles_file): subs = pysubs2.SSAFile() for start, end, text in segments: start_ms = int(start * 1000) end_ms = int(end * 1000) subs.append(pysubs2.SSAEvent(start=start_ms, end=end_ms, text=text)) subs.save(subtitles_file) def embed_subtitles(video_path, subtitles_path, output_path): command = [ 'ffmpeg', '-i', video_path, '-vf', f"subtitles={subtitles_path}", '-c:a', 'copy', output_path ] subprocess.run(command, check=True) return def process_video(url): yt = YouTube(url) video_id = yt.video_id yt_title_cleaned = cleaned_video(yt.title) video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() if not video_stream: print("No suitable video stream found.") return None video_path = os.path.join(save_path, yt_title_cleaned + ".mp4") video_stream.download(output_path=save_path, filename=yt_title_cleaned + ".mp4") video_clip = VideoFileClip(video_path) audio_path = os.path.join(audio_folder, yt_title_cleaned + ".mp3") video_clip.audio.write_audiofile(audio_path) print("Converting Audio to Text.") result = whisper_model.transcribe(audio_path) segments = [] for segment in result["segments"]: start_time, end_time, text = segment["start"], segment["end"], segment["text"] segments.append((start_time, end_time, text)) print("Translating text into segments and making an Audio file for each segment.") translated_segments = [] tts_clips = [] for start, end, text in segments: translated_text = translate(text) translated_segments.append((start, end, translated_text)) tts_audio_path = os.path.join(tts_audio_folder, f"tts_{start}_{end}.wav") generate_tts_audio(translated_text, start, end, tts_audio_path) tts_clip = AudioFileClip(tts_audio_path).subclip(0, end - start) tts_clips.append(tts_clip) combined_tts_audio = concatenate_audioclips(tts_clips) final_video = video_clip.set_audio(combined_tts_audio) final_video_path = os.path.join(save_path, yt_title_cleaned + "_translated.mp4") final_video.write_videofile(final_video_path) # Save the original and translated subtitles original_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_original.srt") create_subtitles(segments, original_subtitles_file) translated_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_translated.srt") create_subtitles(translated_segments, translated_subtitles_file) # Read and return subtitles text original_subtitles_text = read_subtitles(original_subtitles_file) translated_subtitles_text = read_subtitles(translated_subtitles_file) return final_video_path, original_subtitles_text, translated_subtitles_text #url = 'https://youtu.be/AlhELuRMJ_s?si=r2la5DQlOU49QDPW' #processed_video_path = process_video(url) #if processed_video_path: #print(f"Processed video saved at {processed_video_path}") #else: #print("Failed to process video.") with gr.Blocks() as demo: with gr.Row(): text_box = gr.Textbox(label="Enter YouTube Video Link", placeholder="Text box for link") submit_btn = gr.Button("Submit") video = gr.Video() with gr.Row(): original_subs_output = gr.Textbox(label="Original Subs", placeholder="Original Subs", interactive=False) translated_subs_output = gr.Textbox(label="Translated Subs", placeholder="Translated Subs", interactive=False) submit_btn.click(fn=process_video, inputs=text_box, outputs=[video, original_subs_output, translated_subs_output]) demo.launch()