adeeb-khoja commited on
Commit
9c1d41d
·
verified ·
1 Parent(s): 6d31a7a
extract_audio.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip
2
+
3
+
4
+ class VideoHelper(object):
5
+ def extract_audio(self,video_path, audio_path):
6
+ # Load the video file
7
+ video = VideoFileClip(video_path)
8
+
9
+ # Extract the audio
10
+ audio = video.audio
11
+
12
+ # Write the audio to a file
13
+ audio.write_audiofile(audio_path)
14
+
15
+ # Close the video clip
16
+ video.close()
17
+
18
+
helpers/srt_generator.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
+ import json
3
+
4
+ # Sample list of subtitle objects
5
+ subtitles = [
6
+ {
7
+ "text": " The entire world has been suffering due to the climate change dilemma.",
8
+ "start": 0.0,
9
+ "end": 9.88,
10
+ "id": 0
11
+ },
12
+ # Add more subtitle objects here
13
+ ]
14
+
15
+ class SRTGenerator(object):
16
+
17
+ @classmethod
18
+ def format_time(cls,seconds):
19
+ """Convert seconds to SRT time format (hh:mm:ss,ms)"""
20
+ ms = int((seconds - int(seconds)) * 1000)
21
+ td = str(timedelta(seconds=int(seconds)))
22
+ return f"{td},{ms:03d}"
23
+ @classmethod
24
+ def generate_srt(cls,subtitles, output_file):
25
+ with open(output_file, 'w') as f:
26
+ for sub in subtitles:
27
+ start_time = cls.format_time(sub['start'])
28
+ end_time = cls.format_time(sub['end'])
29
+ text = sub['text'].strip()
30
+ srt_entry = f"{sub['id'] + 1}\n{start_time} --> {end_time}\n{text}\n\n"
31
+ f.write(srt_entry)
32
+
33
+
34
+
35
+ if __name__ == "__main__":
36
+ segments_file = "segments.json"
37
+ with open(segments_file, 'r') as f:
38
+ segments = json.load(f)
39
+ output_srt_file = "subtitles.srt"
40
+ SRTGenerator.generate_srt(segments, output_srt_file)
moderator.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pprint import pprint
2
+ from detoxify import Detoxify
3
+ import pandas as pd
4
+
5
+ class DetoxifyModerator(object):
6
+
7
+ def detect_toxicity(self,text):
8
+ results = Detoxify('original').predict(text)
9
+ return results
10
+
11
+ # def get_toxicity_report(self, toxicity_result):
12
+ # for key in toxicity_result:
13
+ # toxicity_result[key] = round(toxicity_result[key] * 100,2)
14
+
15
+ # return toxicity_result
16
+
17
+ def format_results(self,results):
18
+ # Convert the dictionary to a pandas DataFrame
19
+ df = pd.DataFrame(list(results.items()), columns=["Category", "Percentage"])
20
+ df["Percentage"] = df["Percentage"].apply(lambda x: f"{x:.2%}") # Format as percentage
21
+ return df
22
+
23
+ if __name__ == '__main__':
24
+ detoxify_moderator = DetoxifyModerator()
25
+ result = detoxify_moderator.detect_toxicity('To let the user select the target language for translation, you can add a dropdown menu in the Gradio interface. This will allow users to choose the target language before processing the video. Here\'s how you can modify the script to include this feature')
26
+ report = detoxify_moderator.get_toxicity_report(result)
27
+ pprint(report)
28
+
29
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ openai-whisper
6
+ transformers
7
+ sentencepiece
8
+ sacremoses
9
+ pydub
10
+ moviepy
11
+ gradio
12
+ detoxify
13
+ ffmpeg-python
14
+ opencv-python
15
+ pysrt
16
+ python-dotenv
shorts_generator.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pysrt
3
+ from openai import OpenAI
4
+ import os
5
+ import re
6
+ import subprocess
7
+
8
+
9
+
10
+
11
+
12
+ class ShortsGenerator(object):
13
+
14
+ def read_srt(self,file_path):
15
+ subtitles = pysrt.open(file_path)
16
+ return subtitles
17
+
18
+ def extract_text(self,subtitles):
19
+ text = ''
20
+ for subtitle in subtitles:
21
+ text += subtitle.text + ' '
22
+ return text.strip()
23
+
24
+ def get_important_scenes(self,text):
25
+ # Load OpenAI API key
26
+ client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
27
+ response = client.chat.completions.create(
28
+ model="gpt-3.5-turbo",
29
+ messages=[
30
+ {"role": "system", "content": "You are a helpful videos editing assistant."},
31
+ {"role": "user", "content": "Identify the important scenes from the following subtitles text return that by start times and end time,videos should be at less 30s and maximum 2 min with format like this \"1. Arrival of Raymond Reddington at the FBI office - Start time: 00:00:39, End time: 00:01:17\":\n" + text}
32
+ ],
33
+ max_tokens=1500
34
+ )
35
+ # print(f" this out put : {response.choices[0].message.content}")
36
+ important_scenes = response.choices[0].message
37
+ return important_scenes
38
+
39
+ def execute(self,srt_file_path):
40
+ subtitles = self.read_srt(srt_file_path)
41
+ text = self.extract_text(subtitles)
42
+ important_scenes = self.get_important_scenes(text)
43
+ return important_scenes
44
+
45
+
46
+ def extract_scenes(self,input_text):
47
+ scenes = []
48
+
49
+ pattern = r'(?P<scene>\d+)\. (?P<description>.*?) - Start time: (?P<start>\d{2}:\d{2}:\d{2}), End time: (?P<end>\d{2}:\d{2}:\d{2})'
50
+
51
+ matches = re.finditer(pattern, input_text)
52
+ for match in matches:
53
+ scene_data = match.groupdict()
54
+ scenes.append(scene_data)
55
+
56
+ return scenes
57
+
58
+
59
+ def extract_video_scenes(self,video_file, scenes):
60
+
61
+ shorts_files_path_list = []
62
+
63
+ # Output directory
64
+ output_dir = "output/"
65
+
66
+ # Ensure output directory exists
67
+ os.makedirs(output_dir, exist_ok=True)
68
+
69
+ # Process each scene
70
+ for scene in scenes:
71
+ start_time = scene['start']
72
+ end_time = scene['end']
73
+ description = scene['description']
74
+ output_filename = os.path.join(output_dir, f"{description}.mp4")
75
+ shorts_files_path_list.append(output_filename)
76
+
77
+ # ffmpeg command to extract scene
78
+ cmd = [
79
+ 'ffmpeg',
80
+ '-i', video_file,
81
+ '-ss', start_time,
82
+ '-to', end_time,
83
+ '-c:v', 'libx264',
84
+ '-c:a', 'aac',
85
+ '-strict', 'experimental',
86
+ '-b:a', '192k',
87
+ output_filename,
88
+ '-y' # Overwrite output file if exists
89
+ ]
90
+
91
+ subprocess.run(cmd, capture_output=True)
92
+
93
+ return shorts_files_path_list
94
+
95
+
96
+
97
+
98
+ if __name__ == "__main__":
99
+ srt_file_path = 's1.srt'
100
+ path_video = '1.mp4'
101
+ shorts_generator = ShortsGenerator()
102
+ important_scenes = shorts_generator.execute(srt_file_path)
103
+ print("Important Scenes:\n", shorts_generator.extract_scenes(important_scenes.content))
104
+ shorts_generator.extract_video_scenes( path_video, shorts_generator.extract_scenes(important_scenes.content))
105
+ print("Well Done")
subtitles.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
2
+ import json
3
+
4
+
5
+ class SubtitlesRenderer(object):
6
+
7
+ def add_subtitles(self,video_file, subtitle_file, output_file):
8
+ # Load subtitle data from JSON
9
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
10
+ subtitles = json.load(f)
11
+
12
+ # Load the video
13
+ video = VideoFileClip(video_file)
14
+
15
+ # Initialize an array to store TextClips
16
+ text_clips_list = []
17
+
18
+ # Define the maximum width for the subtitles
19
+ max_width = video.size[0] - 40 # Adjust as needed, leaving some padding on the sides
20
+
21
+
22
+ # Create TextClips for each subtitle
23
+ for subtitle in subtitles:
24
+ text = subtitle['text']
25
+ start_time = subtitle['start']
26
+ end_time = subtitle['end']
27
+
28
+ # Create TextClip with subtitle text
29
+ txt_clip = TextClip(text, fontsize=28, color='white', font='Arial', method='caption',size=(max_width, None),stroke_color='black',
30
+ stroke_width= 0.5, bg_color='black',)
31
+
32
+ # Set the duration of the subtitle
33
+ txt_clip = txt_clip.set_duration(end_time - start_time)
34
+
35
+ # Position the subtitle at the bottom
36
+ txt_clip = txt_clip.set_position(('center', 'bottom'))
37
+
38
+ # Add TextClip to the array
39
+ text_clips_list.append(txt_clip.set_start(start_time))
40
+
41
+ # Composite all TextClips onto the video
42
+ #final_clip = video.fl(compose_text, text_clips_list)
43
+ # Composite all TextClips onto the video
44
+ final_clip = CompositeVideoClip([video] + text_clips_list)
45
+
46
+ # Write the result to a file
47
+ final_clip.write_videofile(output_file, codec='libx264', fps=video.fps, audio_codec='aac',
48
+ ffmpeg_params=["-vf", "format=yuv420p"]) # Add this for compatibility
49
+
50
+ return output_file
51
+ # def compose_text(self,frame, t, text_clips):
52
+ # # Select the appropriate TextClips for the current time t
53
+ # current_clips = [text_clip for text_clip in text_clips if text_clip.start < t < text_clip.end]
54
+
55
+ # # Composite the selected TextClips onto the frame
56
+ # for clip in current_clips:
57
+ # frame = frame.blit(clip.get_frame(t - clip.start), clip.pos)
58
+ # return frame
59
+
60
+ if __name__ == '__main__':
61
+ video_file = 'video.mp4'
62
+ subtitle_file = 'segments.json'
63
+ output_file = 'output_video_with_subtitles.mp4'
64
+
65
+ renderer = SubtitlesRenderer()
66
+ renderer.add_subtitles(video_file, subtitle_file, output_file)
67
+
transcript_detect.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+
3
+ class WhisperModel(object):
4
+
5
+ def __init__(self,model_type):
6
+ self.model = whisper.load_model("base")
7
+ # Transcribe an audio file
8
+ def transcribe_audio(self,file_path):
9
+ try:
10
+ result = self.model.transcribe(file_path)
11
+ return result
12
+ except Exception as e:
13
+ print(f"Error {e}")
14
+ raise Exception(f'Error trnascribe audio file {e}')
15
+
16
+ def get_text(self,transcription):
17
+ return transcription['text']
18
+
19
+ def get_detected_language(self,transcription):
20
+ return transcription['language']
21
+
22
+ def get_segments(self,transcription):
23
+ text_segments = []
24
+ for segment in transcription['segments']:
25
+ text_segments.append({
26
+ "text": segment['text'],
27
+ "start": segment['start'],
28
+ "end": segment['end'],
29
+ "id": segment['id'],
30
+ })
31
+ return text_segments
32
+
33
+ def detect_language(self,file_path):
34
+ try:
35
+ audio = whisper.load_audio(file_path)
36
+ audio = whisper.pad_or_trim(audio)
37
+ # make log-Mel spectrogram and move to the same device as the model
38
+ mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
39
+ # detect the spoken language
40
+ _, probs = self.model.detect_language(mel)
41
+ print(f"Detected language: {max(probs, key=probs.get)}")
42
+ return max(probs, key=probs.get)
43
+ except Exception as e:
44
+ print(f"Error {e}")
45
+ raise Exception(f'Error detecting language {e}')
46
+
47
+
translation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer
2
+
3
+ class TranslationModel(object):
4
+ def __init__(self):
5
+ pass
6
+
7
+ def translate_chunk(self,chunk, src_lang, tgt_lang):
8
+ try:
9
+
10
+ model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
11
+ model = MarianMTModel.from_pretrained(model_name)
12
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
13
+
14
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
15
+ translated_tokens = model.generate(**inputs)
16
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
17
+
18
+ return translated_text
19
+
20
+ except Exception as e:
21
+ print(e)
22
+ raise Exception(f"Error translating text {e}")
23
+
24
+ def translate_text(self,text, src_lang, tgt_lang):
25
+ max_length = 512
26
+ chunks = self.split_text(text, max_length)
27
+ translated_chunks = [self.translate_chunk(chunk, src_lang, tgt_lang) for chunk in chunks]
28
+ return ' '.join(translated_chunks)
29
+
30
+
31
+
32
+ def split_text(self,text, max_length):
33
+ # Split text into sentences
34
+ sentences = text.split('. ')
35
+ chunks = []
36
+ current_chunk = ""
37
+
38
+ for sentence in sentences:
39
+ if len(current_chunk) + len(sentence) + 1 > max_length:
40
+ chunks.append(current_chunk.strip())
41
+ current_chunk = sentence + ". "
42
+ else:
43
+ current_chunk += sentence + ". "
44
+
45
+ if current_chunk:
46
+ chunks.append(current_chunk.strip())
47
+
48
+ return chunks