File size: 5,788 Bytes
6d31a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

import json

from altair import value
from matplotlib.streamplot import OutOfBounds
from sympy import substitution, viete
from extract_audio import VideoHelper
from helpers.srt_generator import SRTGenerator
from moderator import DetoxifyModerator
from shorts_generator import ShortsGenerator
from subtitles import SubtitlesRenderer
from transcript_detect import *
from translation import *
import gradio as gr
from dotenv import load_dotenv



def translate_segments(segments,translator: TranslationModel,from_lang,to_lang):
    transalted_segments = []
    for segment in segments:
        translated_segment_text = translator.translate_text(segment['text'],from_lang,to_lang)
        transalted_segments.append({'text':translated_segment_text,'start':segment['start'],'end':segment['end'],'id':segment['id']})
    
    return transalted_segments


def main(file,translate_to_lang):

    #Extracting the audio from video
    video_file_path = file
    audio_file_path = 'extracted_audio.mp3'
    video_helper = VideoHelper()
    print('Extracting audio from video...')
    video_helper.extract_audio(video_file_path, audio_file_path)


    whisper_model = WhisperModel('base')

    print('Transcriping audio file....')
    transcription = whisper_model.transcribe_audio(audio_file_path)

    print('Generating transctipt text...')
    transcript_text = whisper_model.get_text(transcription)

    print('Detecting audio language....')
    detected_language = whisper_model.get_detected_language(transcription)

    print('Generating transcript segments...')
    transcript_segments = whisper_model.get_segments(transcription)
    

    # Write the transcription to a text file
    print('Writing transcript into text file...')
    transcript_file_path = "transcript.txt"
    with open(transcript_file_path, "w",encoding="utf-8") as file:
        file.write(transcript_text)

    # Translate transcript
    translation_model = TranslationModel()
    target_language = supported_languages[translate_to_lang]

    print(f'Translating transcript text from {detected_language} to {target_language}...')
    transalted_text = translation_model.translate_text(transcript_text,detected_language,target_language)

    # print(f'Translating transcript segments from {detected_language} to {target_language}...')
    # transalted_segments = translate_segments(transcript_segments,translation_model,detected_language,target_language)

    # Write the translation to a text file
    print('Writing translation text file...')
    translation_file_path = "translation.txt"
    with open(translation_file_path, "w",encoding="utf-8") as file:
        file.write(transalted_text)

    print('Writing transcsript segments and translated segments to json file...')
    segments_file_path = "segments.json"
    with open(segments_file_path, "w",encoding="utf-8") as file:
        json.dump(transcript_segments, file,ensure_ascii=False)

    # print('Writing transcsript segments and translated segments to json file...')
    # translated_segments_file_path = "translated_segments.json"
    # with open(translated_segments_file_path, "w",encoding="utf-8") as file:
    #     json.dump(transalted_segments, file,ensure_ascii=False)

    #Run Moderator to detect toxicity
    print('Analyzing and detecing toxicity levels...')
    detoxify_moderator = DetoxifyModerator()
    result = detoxify_moderator.detect_toxicity(transcript_text)
    df = detoxify_moderator.format_results(result)


    #Render subtitles on video
    renderer = SubtitlesRenderer()
    subtitles_file_path = 'segments.json'
    output_file_path = 'subtitled_video.mp4'
    subtitled_video = renderer.add_subtitles(video_file_path,subtitles_file_path,output_file_path)


    
    # Generate short videos from video
    output_srt_file = 'subtitles.srt'
    print('Generating SRT file...')
    #Generate srt file
    SRTGenerator.generate_srt(transcript_segments,output_srt_file)
    shorts_generator = ShortsGenerator()
    print('Generating shorts from important scenes...')
    selected_scenes = shorts_generator.execute(output_srt_file)
    shorts_path_list = shorts_generator.extract_video_scenes( video_file_path, shorts_generator.extract_scenes(selected_scenes.content))

    return_shorts_list = shorts_path_list + [""] * (3 - len(shorts_path_list))
    

    return transcript_text, transalted_text, df, subtitled_video, return_shorts_list[0], return_shorts_list[1], return_shorts_list[2]



def interface_function(file,translate_to_lang,with_transcript=False,with_translations=False,with_subtitles=False,with_shorts=False):

    return main(file,translate_to_lang)

supported_languages = {
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Russian": "ru",
    "Arabic": "ar",
    "Hindi": "hi"
}


# Load environment variables from .env file
load_dotenv()


inputs = [gr.Video(label='Content Video'),gr.Dropdown(list(supported_languages.keys()), label="Target Language"),gr.Checkbox(label="Generate Transcript"),
        gr.Checkbox(label="Translate Transcript"),gr.Checkbox(label="Generate Subtitles"),gr.Checkbox(label="Generate Shorts")]

outputs = [gr.Textbox(label="Transcript"), gr.Textbox(label="Translation"),gr.DataFrame(label="Moderation Results"),gr.Video(label='Output Video with Subtitles')]
short_outputs = [gr.Video(label=f"Short {i+1}") for i in range(3)]
outputs.extend(short_outputs)
demo = gr.Interface(
fn=interface_function,
inputs=inputs,
outputs=outputs,
title="Rosetta AI",
description="Content Creation Customization"
)
    # with gr.Blocks() as demo:
    #     file_output = gr.File()
    #     upload_button = gr.UploadButton("Click to Upload a Video", file_types=["video"], file_count="single")
    #     upload_button.upload(main, upload_button, ['text','text'])



demo.launch()