Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +9 -5
video_processing.py
CHANGED
@@ -59,6 +59,10 @@ def analyze_scenes(video_path, scenes, description):
|
|
59 |
highest_prob = 0.0
|
60 |
best_scene = None
|
61 |
|
|
|
|
|
|
|
|
|
62 |
for scene_num, (start_time, end_time) in enumerate(scenes):
|
63 |
frames = extract_frames(video_path, start_time, end_time)
|
64 |
if not frames:
|
@@ -68,12 +72,11 @@ def analyze_scenes(video_path, scenes, description):
|
|
68 |
scene_prob = 0.0
|
69 |
for frame in frames:
|
70 |
image = Image.fromarray(frame[..., ::-1])
|
71 |
-
|
72 |
with torch.no_grad():
|
73 |
-
|
74 |
-
logits_per_image =
|
75 |
-
|
76 |
-
scene_prob += probs[0][0].item() # Get the probability of the first class
|
77 |
|
78 |
scene_prob /= len(frames)
|
79 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}")
|
@@ -112,3 +115,4 @@ def process_video(video_url, description):
|
|
112 |
final_clip.write_videofile(final_clip_path, codec='libx264', audio_codec='aac')
|
113 |
return final_clip_path
|
114 |
return None
|
|
|
|
59 |
highest_prob = 0.0
|
60 |
best_scene = None
|
61 |
|
62 |
+
# Tokenize and encode the description text
|
63 |
+
text_inputs = processor(text=[description], return_tensors="pt", padding=True).to(device)
|
64 |
+
text_features = model.get_text_features(**text_inputs).detach()
|
65 |
+
|
66 |
for scene_num, (start_time, end_time) in enumerate(scenes):
|
67 |
frames = extract_frames(video_path, start_time, end_time)
|
68 |
if not frames:
|
|
|
72 |
scene_prob = 0.0
|
73 |
for frame in frames:
|
74 |
image = Image.fromarray(frame[..., ::-1])
|
75 |
+
image_input = processor(images=image, return_tensors="pt").to(device)
|
76 |
with torch.no_grad():
|
77 |
+
image_features = model.get_image_features(**image_input).detach()
|
78 |
+
logits_per_image = torch.cosine_similarity(image_features, text_features)
|
79 |
+
scene_prob += logits_per_image.item()
|
|
|
80 |
|
81 |
scene_prob /= len(frames)
|
82 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}")
|
|
|
115 |
final_clip.write_videofile(final_clip_path, codec='libx264', audio_codec='aac')
|
116 |
return final_clip_path
|
117 |
return None
|
118 |
+
|