Spaces:

adeeb-khoja
/

Rosetta-AI

Running

App Files Files Community

adeeb-khoja commited on Jul 2, 2024

Commit

9c1d41d

verified ·

1 Parent(s): 6d31a7a

commit 2

Browse files

Files changed (8) hide show

extract_audio.py +18 -0
helpers/srt_generator.py +40 -0
moderator.py +29 -0
requirements.txt +16 -0
shorts_generator.py +105 -0
subtitles.py +67 -0
transcript_detect.py +47 -0
translation.py +48 -0

extract_audio.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from moviepy.editor import VideoFileClip
+class VideoHelper(object):
+    def extract_audio(self,video_path, audio_path):
+        # Load the video file
+        video = VideoFileClip(video_path)
+        # Extract the audio
+        audio = video.audio
+        # Write the audio to a file
+        audio.write_audiofile(audio_path)
+        # Close the video clip
+        video.close()

helpers/srt_generator.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from datetime import timedelta
+import json
+# Sample list of subtitle objects
+subtitles = [
+    {
+        "text": " The entire world has been suffering due to the climate change dilemma.",
+        "start": 0.0,
+        "end": 9.88,
+        "id": 0
+    },
+    # Add more subtitle objects here
+]
+class SRTGenerator(object):
+    @classmethod
+    def format_time(cls,seconds):
+        """Convert seconds to SRT time format (hh:mm:ss,ms)"""
+        ms = int((seconds - int(seconds)) * 1000)
+        td = str(timedelta(seconds=int(seconds)))
+        return f"{td},{ms:03d}"
+    @classmethod
+    def generate_srt(cls,subtitles, output_file):
+        with open(output_file, 'w') as f:
+            for sub in subtitles:
+                start_time = cls.format_time(sub['start'])
+                end_time = cls.format_time(sub['end'])
+                text = sub['text'].strip()
+                srt_entry = f"{sub['id'] + 1}\n{start_time} --> {end_time}\n{text}\n\n"
+                f.write(srt_entry)
+if __name__ == "__main__":
+    segments_file = "segments.json"
+    with open(segments_file, 'r') as f:
+        segments = json.load(f)
+    output_srt_file = "subtitles.srt"
+    SRTGenerator.generate_srt(segments, output_srt_file)

moderator.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pprint import pprint
+from detoxify import Detoxify
+import pandas as pd
+class DetoxifyModerator(object):
+    def detect_toxicity(self,text):
+        results = Detoxify('original').predict(text)
+        return results
+    # def get_toxicity_report(self, toxicity_result):
+    #     for key in toxicity_result:
+    #         toxicity_result[key] = round(toxicity_result[key] * 100,2)
+    #     return toxicity_result
+    def format_results(self,results):
+        # Convert the dictionary to a pandas DataFrame
+        df = pd.DataFrame(list(results.items()), columns=["Category", "Percentage"])
+        df["Percentage"] = df["Percentage"].apply(lambda x: f"{x:.2%}")  # Format as percentage
+        return df
+if __name__ == '__main__':
+    detoxify_moderator = DetoxifyModerator()
+    result = detoxify_moderator.detect_toxicity('To let the user select the target language for translation, you can add a dropdown menu in the Gradio interface. This will allow users to choose the target language before processing the video. Here\'s how you can modify the script to include this feature')
+    report = detoxify_moderator.get_toxicity_report(result)
+    pprint(report)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+openai
+torch
+torchvision
+torchaudio
+openai-whisper
+transformers
+sentencepiece
+sacremoses
+pydub
+moviepy
+gradio
+detoxify
+ffmpeg-python
+opencv-python
+pysrt
+python-dotenv

shorts_generator.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import pysrt
+from openai import OpenAI
+import os
+import re
+import subprocess
+class ShortsGenerator(object):
+    def read_srt(self,file_path):
+        subtitles = pysrt.open(file_path)
+        return subtitles
+    def extract_text(self,subtitles):
+        text = ''
+        for subtitle in subtitles:
+            text += subtitle.text + ' '
+        return text.strip()
+    def get_important_scenes(self,text):
+        # Load OpenAI API key
+        client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful videos editing assistant."},
+                {"role": "user", "content": "Identify the important scenes from the following subtitles text return that by start times and end time,videos should be at less 30s and maximum 2 min with format like this \"1. Arrival of Raymond Reddington at the FBI office - Start time: 00:00:39, End time: 00:01:17\":\n" + text}
+            ],
+            max_tokens=1500
+        )
+        # print(f" this out put : {response.choices[0].message.content}")
+        important_scenes = response.choices[0].message
+        return important_scenes
+    def execute(self,srt_file_path):
+        subtitles = self.read_srt(srt_file_path)
+        text = self.extract_text(subtitles)
+        important_scenes = self.get_important_scenes(text)
+        return important_scenes
+    def extract_scenes(self,input_text):
+        scenes = []
+        pattern = r'(?P<scene>\d+)\. (?P<description>.*?) - Start time: (?P<start>\d{2}:\d{2}:\d{2}), End time: (?P<end>\d{2}:\d{2}:\d{2})'
+        matches = re.finditer(pattern, input_text)
+        for match in matches:
+            scene_data = match.groupdict()
+            scenes.append(scene_data)
+        return scenes
+    def extract_video_scenes(self,video_file, scenes):
+        shorts_files_path_list = []
+        # Output directory
+        output_dir = "output/"
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        # Process each scene
+        for scene in scenes:
+            start_time = scene['start']
+            end_time = scene['end']
+            description = scene['description']
+            output_filename = os.path.join(output_dir, f"{description}.mp4")
+            shorts_files_path_list.append(output_filename)
+            # ffmpeg command to extract scene
+            cmd = [
+                'ffmpeg',
+                '-i', video_file,
+                '-ss', start_time,
+                '-to', end_time,
+                '-c:v', 'libx264',
+                '-c:a', 'aac',
+                '-strict', 'experimental',
+                '-b:a', '192k',
+                output_filename,
+                '-y'  # Overwrite output file if exists
+            ]
+            subprocess.run(cmd, capture_output=True)
+        return shorts_files_path_list
+if __name__ == "__main__":
+    srt_file_path = 's1.srt'
+    path_video = '1.mp4'
+    shorts_generator = ShortsGenerator()
+    important_scenes = shorts_generator.execute(srt_file_path)
+    print("Important Scenes:\n", shorts_generator.extract_scenes(important_scenes.content))
+    shorts_generator.extract_video_scenes( path_video, shorts_generator.extract_scenes(important_scenes.content))
+    print("Well Done")

subtitles.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
+import json
+class SubtitlesRenderer(object):
+    def add_subtitles(self,video_file, subtitle_file, output_file):
+        # Load subtitle data from JSON
+        with open(subtitle_file, 'r', encoding='utf-8') as f:
+            subtitles = json.load(f)
+        # Load the video
+        video = VideoFileClip(video_file)
+        # Initialize an array to store TextClips
+        text_clips_list = []
+        # Define the maximum width for the subtitles
+        max_width = video.size[0] - 40  # Adjust as needed, leaving some padding on the sides
+        # Create TextClips for each subtitle
+        for subtitle in subtitles:
+            text = subtitle['text']
+            start_time = subtitle['start']
+            end_time = subtitle['end']
+            # Create TextClip with subtitle text
+            txt_clip = TextClip(text, fontsize=28, color='white', font='Arial', method='caption',size=(max_width, None),stroke_color='black',
+            stroke_width= 0.5, bg_color='black',)
+            # Set the duration of the subtitle
+            txt_clip = txt_clip.set_duration(end_time - start_time)
+            # Position the subtitle at the bottom
+            txt_clip = txt_clip.set_position(('center', 'bottom'))
+            # Add TextClip to the array
+            text_clips_list.append(txt_clip.set_start(start_time))
+        # Composite all TextClips onto the video
+        #final_clip = video.fl(compose_text, text_clips_list)
+        # Composite all TextClips onto the video
+        final_clip = CompositeVideoClip([video] + text_clips_list)
+        # Write the result to a file
+        final_clip.write_videofile(output_file, codec='libx264', fps=video.fps, audio_codec='aac',
+                                    ffmpeg_params=["-vf", "format=yuv420p"])  # Add this for compatibility
+        return output_file
+    # def compose_text(self,frame, t, text_clips):
+    #     # Select the appropriate TextClips for the current time t
+    #     current_clips = [text_clip for text_clip in text_clips if text_clip.start < t < text_clip.end]
+    #     # Composite the selected TextClips onto the frame
+    #     for clip in current_clips:
+    #         frame = frame.blit(clip.get_frame(t - clip.start), clip.pos)
+    #     return frame
+if __name__ == '__main__':
+    video_file = 'video.mp4'
+    subtitle_file = 'segments.json'
+    output_file = 'output_video_with_subtitles.mp4'
+    renderer = SubtitlesRenderer()
+    renderer.add_subtitles(video_file, subtitle_file, output_file)

transcript_detect.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import whisper
+class WhisperModel(object):
+    def __init__(self,model_type):
+        self.model = whisper.load_model("base")
+    # Transcribe an audio file
+    def transcribe_audio(self,file_path):
+        try:
+            result = self.model.transcribe(file_path)
+            return result
+        except Exception as e:
+            print(f"Error {e}")
+            raise Exception(f'Error trnascribe audio file {e}')
+    def get_text(self,transcription):
+        return transcription['text']
+    def get_detected_language(self,transcription):
+        return transcription['language']
+    def get_segments(self,transcription):
+        text_segments = []
+        for segment in transcription['segments']:
+            text_segments.append({
+                "text": segment['text'],
+                "start": segment['start'],
+                "end": segment['end'],
+                "id": segment['id'],
+            })
+        return text_segments
+    def detect_language(self,file_path):
+        try:
+            audio = whisper.load_audio(file_path)
+            audio = whisper.pad_or_trim(audio)
+            # make log-Mel spectrogram and move to the same device as the model
+            mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
+            # detect the spoken language
+            _, probs = self.model.detect_language(mel)
+            print(f"Detected language: {max(probs, key=probs.get)}")
+            return max(probs, key=probs.get)
+        except Exception as e:
+            print(f"Error {e}")
+            raise Exception(f'Error detecting language {e}')

translation.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from transformers import MarianMTModel, MarianTokenizer
+class TranslationModel(object):
+    def __init__(self):
+        pass
+    def translate_chunk(self,chunk, src_lang, tgt_lang):
+        try:
+            model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
+            model = MarianMTModel.from_pretrained(model_name)
+            tokenizer = MarianTokenizer.from_pretrained(model_name)
+            inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
+            translated_tokens = model.generate(**inputs)
+            translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
+            return translated_text
+        except Exception as e:
+            print(e)
+            raise Exception(f"Error translating text {e}")
+    def translate_text(self,text, src_lang, tgt_lang):
+        max_length = 512
+        chunks = self.split_text(text, max_length)
+        translated_chunks = [self.translate_chunk(chunk, src_lang, tgt_lang) for chunk in chunks]
+        return ' '.join(translated_chunks)
+    def split_text(self,text, max_length):
+        # Split text into sentences
+        sentences = text.split('. ')
+        chunks = []
+        current_chunk = ""
+        for sentence in sentences:
+            if len(current_chunk) + len(sentence) + 1 > max_length:
+                chunks.append(current_chunk.strip())
+                current_chunk = sentence + ". "
+            else:
+                current_chunk += sentence + ". "
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+        return chunks