Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +5 -27
video_processing.py
CHANGED
@@ -11,6 +11,8 @@ from PIL import Image
|
|
11 |
import uuid
|
12 |
from torchvision import models, transforms
|
13 |
from torch.nn import functional as F
|
|
|
|
|
14 |
|
15 |
categories = ["Joy", "Trust", "Fear", "Surprise", "Sadness", "Disgust", "Anger", "Anticipation"]
|
16 |
|
@@ -66,7 +68,7 @@ def download_video(url):
|
|
66 |
def sanitize_filename(filename):
|
67 |
return "".join([c if c.isalnum() or c in " .-_()" else "_" for c in filename])
|
68 |
|
69 |
-
def find_scenes(video_path, downscale_factor=
|
70 |
video_manager = VideoManager([video_path])
|
71 |
scene_manager = SceneManager()
|
72 |
scene_manager.add_detector(ContentDetector(threshold=33)) # Adjusted threshold for finer segmentation
|
@@ -88,13 +90,11 @@ def extract_frames(video_path, start_time, end_time):
|
|
88 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
89 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
90 |
# Extract more frames: every frame in the scene
|
91 |
-
for frame_time in range(0, int(video_clip.duration * video_clip.fps), int(video_clip.fps /
|
92 |
frame = video_clip.get_frame(frame_time / video_clip.fps)
|
93 |
frames.append(frame)
|
94 |
return frames
|
95 |
|
96 |
-
import numpy as np
|
97 |
-
|
98 |
def analyze_scenes(video_path, scenes, description):
|
99 |
scene_scores = []
|
100 |
negative_descriptions = [
|
@@ -103,7 +103,7 @@ def analyze_scenes(video_path, scenes, description):
|
|
103 |
"dark scene without much contrast",
|
104 |
"No people are in this scene",
|
105 |
"A still shot of natural scenery",
|
106 |
-
"Still-camera shot of a person's face"
|
107 |
]
|
108 |
|
109 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
@@ -164,28 +164,6 @@ def extract_best_scene(video_path, scene):
|
|
164 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
165 |
return video_clip
|
166 |
|
167 |
-
def process_video(video_url, description):
|
168 |
-
video_path = download_video(video_url)
|
169 |
-
scenes = find_scenes(video_path)
|
170 |
-
best_scene = analyze_scenes(video_path, scenes, description)
|
171 |
-
final_clip = extract_best_scene(video_path, best_scene)
|
172 |
-
|
173 |
-
if final_clip:
|
174 |
-
# Assuming final_clip is a MoviePy VideoFileClip object
|
175 |
-
frame = np.array(final_clip.get_frame(0)) # Get the first frame at t=0 seconds
|
176 |
-
frame_classification = classify_frame(frame) # Classify the frame
|
177 |
-
print("Frame classification probabilities:", frame_classification)
|
178 |
-
|
179 |
-
output_dir = "output"
|
180 |
-
os.makedirs(output_dir, exist_ok=True)
|
181 |
-
final_clip_path = os.path.join(output_dir, f"{uuid.uuid4()}_final_clip.mp4")
|
182 |
-
final_clip.write_videofile(final_clip_path, codec='libx264', audio_codec='aac')
|
183 |
-
cleanup_temp_files()
|
184 |
-
return final_clip_path
|
185 |
-
|
186 |
-
return None
|
187 |
-
|
188 |
-
|
189 |
def cleanup_temp_files():
|
190 |
temp_dir = 'temp_videos'
|
191 |
if os.path.exists(temp_dir):
|
|
|
11 |
import uuid
|
12 |
from torchvision import models, transforms
|
13 |
from torch.nn import functional as F
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
|
17 |
categories = ["Joy", "Trust", "Fear", "Surprise", "Sadness", "Disgust", "Anger", "Anticipation"]
|
18 |
|
|
|
68 |
def sanitize_filename(filename):
|
69 |
return "".join([c if c.isalnum() or c in " .-_()" else "_" for c in filename])
|
70 |
|
71 |
+
def find_scenes(video_path, downscale_factor=50):
|
72 |
video_manager = VideoManager([video_path])
|
73 |
scene_manager = SceneManager()
|
74 |
scene_manager.add_detector(ContentDetector(threshold=33)) # Adjusted threshold for finer segmentation
|
|
|
90 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
91 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
92 |
# Extract more frames: every frame in the scene
|
93 |
+
for frame_time in range(0, int(video_clip.duration * video_clip.fps), int(video_clip.fps / 4)):
|
94 |
frame = video_clip.get_frame(frame_time / video_clip.fps)
|
95 |
frames.append(frame)
|
96 |
return frames
|
97 |
|
|
|
|
|
98 |
def analyze_scenes(video_path, scenes, description):
|
99 |
scene_scores = []
|
100 |
negative_descriptions = [
|
|
|
103 |
"dark scene without much contrast",
|
104 |
"No people are in this scene",
|
105 |
"A still shot of natural scenery",
|
106 |
+
#"Still-camera shot of a person's face"
|
107 |
]
|
108 |
|
109 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
|
|
164 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
165 |
return video_clip
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
def cleanup_temp_files():
|
168 |
temp_dir = 'temp_videos'
|
169 |
if os.path.exists(temp_dir):
|