jschwab21 commited on
Commit
90dff8a
·
verified ·
1 Parent(s): dcb53fc

Create video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +91 -0
video_processing.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from scenedetect import VideoManager, SceneManager
3
+ from scenedetect.detectors import ContentDetector
4
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
5
+ from transformers import CLIPProcessor, CLIPModel
6
+ import torch
7
+ import yt_dlp
8
+ import os
9
+
10
+ def process_video(video_url, description):
11
+ # Download or load the video from the URL
12
+ video_path = download_video(video_url)
13
+
14
+ # Segment video into scenes
15
+ scenes = detect_scenes(video_path)
16
+
17
+ # Extract frames and analyze with CLIP model
18
+ best_scenes = analyze_scenes(scenes, description)
19
+
20
+ # Combine best scenes into a final clip
21
+ final_clip = combine_scenes(best_scenes)
22
+
23
+ # Save and return the final clip
24
+ final_clip_path = "output/final_clip.mp4"
25
+ final_clip.write_videofile(final_clip_path)
26
+ return final_clip_path
27
+
28
+ def detect_scenes(video_path):
29
+ video_manager = VideoManager([video_path])
30
+ scene_manager = SceneManager()
31
+ scene_manager.add_detector(ContentDetector())
32
+ video_manager.start()
33
+
34
+ scene_manager.detect_scenes(frame_source=video_manager)
35
+ scene_list = scene_manager.get_scene_list()
36
+ video_manager.release()
37
+
38
+ return scene_list
39
+
40
+ def analyze_scenes(scenes, description):
41
+ # Load CLIP model and processor
42
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
43
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
44
+
45
+ best_scenes = []
46
+
47
+ for scene in scenes:
48
+ # Extract every 5th frame from the scene
49
+ frames = extract_frames(scene)
50
+
51
+ # Analyze frames with CLIP
52
+ for frame in frames:
53
+ inputs = processor(text=description, images=frame, return_tensors="pt", padding=True)
54
+ outputs = model(**inputs)
55
+ logits_per_image = outputs.logits_per_image
56
+ probs = logits_per_image.softmax(dim=1)
57
+
58
+ # Store scenes with high probabilities for the description
59
+ if max(probs) > 0.5: # Define a suitable threshold
60
+ best_scenes.append(scene)
61
+ break
62
+
63
+ return best_scenes
64
+
65
+ def extract_frames(scene):
66
+ frames = []
67
+ start_frame, end_frame = scene[0].get_frames(), scene[1].get_frames()
68
+ video_clip = VideoFileClip(scene[0].get_filename())
69
+
70
+ for frame_num in range(start_frame, end_frame, 5):
71
+ frame = video_clip.get_frame(frame_num / video_clip.fps)
72
+ frames.append(frame)
73
+
74
+ return frames
75
+
76
+ def combine_scenes(scenes):
77
+ final_clip = concatenate_videoclips([VideoFileClip(scene[0].get_filename()).subclip(scene[0].get_seconds(), scene[1].get_seconds()) for scene in scenes])
78
+ return final_clip
79
+
80
+ def download_video(video_url):
81
+ ydl_opts = {
82
+ 'format': 'bestvideo[height<=1440]+bestaudio/best[height<=1440]',
83
+ 'outtmpl': 'downloaded_video.%(ext)s',
84
+ 'noplaylist': True,
85
+ }
86
+
87
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
88
+ info_dict = ydl.extract_info(video_url, download=True)
89
+ video_file = ydl.prepare_filename(info_dict)
90
+
91
+ return video_file