File size: 6,232 Bytes
f255ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c171fd
f255ea5
 
 
 
 
5cb2941
f255ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# -*- coding: utf-8 -*-
"""Untitled1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1OxX4WwJL-ZQPL79F5LBrHOxsXcePSiWN
"""


#install gradio

#sudo apt update && sudo apt install ffmpeg

#pip install ffmpeg

#pip install git+https://github.com/openai/whisper.git
#pip install pytube transformers moviepy TTS youtube_transcript_api pydub SentencePiece pysubs2

import os
import gradio as gr
import re
import nltk
from pytube import YouTube
from transformers import MarianMTModel, MarianTokenizer
from moviepy.editor import VideoFileClip, concatenate_audioclips, AudioFileClip
from whisper import load_model
from TTS.api import TTS
from pydub import AudioSegment, silence
import pysubs2
import subprocess

nltk.download('punkt')
model_name = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
tts = TTS(model_name="tts_models/fr/css10/vits")
whisper_model = load_model("base")


save_path =  "videos6"
os.makedirs(save_path, exist_ok=True)
audio_folder = os.path.join(save_path, "audio")
os.makedirs(audio_folder, exist_ok=True)
tts_audio_folder = os.path.join(save_path, "tts_audio")
os.makedirs(tts_audio_folder, exist_ok=True)

def cleaned_video(video_name):
    return re.sub(r'[\\/*?:"<>|]', "", video_name)
    
def read_subtitles(subtitles_file):
    with open(subtitles_file, 'r', encoding='utf-8') as file:
        return file.read()
        
def translate(text):
    sentences = nltk.tokenize.sent_tokenize(text)
    translations = []
    for sentence in sentences:
        batch = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
        gen = model.generate(**batch)
        translation = tokenizer.batch_decode(gen, skip_special_tokens=True)
        translations.append(translation[0])
    return ' '.join(translations)

def generate_tts_audio(text, start, end, tts_audio_path):
    tts.tts_to_file(text=text, file_path=tts_audio_path)
    tts_audio = AudioSegment.from_mp3(tts_audio_path)
    expected_duration = (end - start) * 1000
    actual_duration = len(tts_audio)
    if actual_duration < expected_duration:
        silence_duration = expected_duration - actual_duration
        silence_audio = AudioSegment.silent(duration=silence_duration)
        tts_audio += silence_audio
        tts_audio.export(tts_audio_path, format='wav')
    return True

def create_subtitles(segments, subtitles_file):
    subs = pysubs2.SSAFile()
    for start, end, text in segments:
        start_ms = int(start * 1000)
        end_ms = int(end * 1000)
        subs.append(pysubs2.SSAEvent(start=start_ms, end=end_ms, text=text))
    subs.save(subtitles_file)

def embed_subtitles(video_path, subtitles_path, output_path):
    command = [
        'ffmpeg',
        '-i', video_path,
        '-vf', f"subtitles={subtitles_path}",
        '-c:a', 'copy',
        output_path
    ]
    subprocess.run(command, check=True)
    return

def process_video(url):
    yt = YouTube(url)
    video_id = yt.video_id
    yt_title_cleaned = cleaned_video(yt.title)
    video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()

    if not video_stream:
        print("No suitable video stream found.")
        return None

    video_path = os.path.join(save_path, yt_title_cleaned + ".mp4")
    video_stream.download(output_path=save_path, filename=yt_title_cleaned + ".mp4")

    video_clip = VideoFileClip(video_path)
    audio_path = os.path.join(audio_folder, yt_title_cleaned + ".mp3")
    video_clip.audio.write_audiofile(audio_path)
    print("Converting Audio to Text.")
    result = whisper_model.transcribe(audio_path)
    segments = []
    for segment in result["segments"]:
        start_time, end_time, text = segment["start"], segment["end"], segment["text"]
        segments.append((start_time, end_time, text))
    print("Translating text into segments and making an Audio file for each segment.")
    translated_segments = []
    tts_clips = []
    for start, end, text in segments:
        translated_text = translate(text)
        translated_segments.append((start, end, translated_text))

        tts_audio_path = os.path.join(tts_audio_folder, f"tts_{start}_{end}.wav")
        generate_tts_audio(translated_text, start, end, tts_audio_path)

        tts_clip = AudioFileClip(tts_audio_path).subclip(0, end - start)
        tts_clips.append(tts_clip)

    combined_tts_audio = concatenate_audioclips(tts_clips)
    final_video = video_clip.set_audio(combined_tts_audio)
    final_video_path = os.path.join(save_path, yt_title_cleaned + "_translated.mp4")
    final_video.write_videofile(final_video_path)

    # Save the original and translated subtitles
    original_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_original.srt")
    create_subtitles(segments, original_subtitles_file)

    translated_subtitles_file = os.path.join(save_path, yt_title_cleaned + "_translated.srt")
    create_subtitles(translated_segments, translated_subtitles_file)

    # Read and return subtitles text
    original_subtitles_text = read_subtitles(original_subtitles_file)
    translated_subtitles_text = read_subtitles(translated_subtitles_file)

    return final_video_path, original_subtitles_text, translated_subtitles_text


#url = 'https://youtu.be/AlhELuRMJ_s?si=r2la5DQlOU49QDPW'

#processed_video_path = process_video(url)
#if processed_video_path:
    #print(f"Processed video saved at {processed_video_path}")
#else:
    #print("Failed to process video.")

with gr.Blocks() as demo:
    with gr.Row():
        text_box = gr.Textbox(label="Enter YouTube Video Link", placeholder="Text box for link")
        submit_btn = gr.Button("Submit")
    
    video = gr.Video()
    
    with gr.Row():
        original_subs_output = gr.Textbox(label="Original Subs", placeholder="Original Subs", interactive=False)
        translated_subs_output = gr.Textbox(label="Translated Subs", placeholder="Translated Subs", interactive=False)
    
    submit_btn.click(fn=process_video, inputs=text_box, outputs=[video, original_subs_output, translated_subs_output])

demo.launch()