Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +32 -20
video_processing.py
CHANGED
@@ -104,6 +104,7 @@ def analyze_scenes(video_path, scenes, description):
|
|
104 |
"A still shot of natural scenery",
|
105 |
"Still-camera shot of a person's face"
|
106 |
]
|
|
|
107 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
108 |
text_features = model.get_text_features(**text_inputs).detach()
|
109 |
positive_feature, negative_features = text_features[0], text_features[1:]
|
@@ -115,34 +116,45 @@ def analyze_scenes(video_path, scenes, description):
|
|
115 |
continue
|
116 |
|
117 |
scene_prob = 0.0
|
118 |
-
sentiment_distributions = np.zeros(8) # Assuming 8 sentiments
|
119 |
for frame in frames:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
frame_sentiments = classify_frame(frame)
|
121 |
sentiment_distributions += np.array(frame_sentiments)
|
122 |
|
123 |
-
sentiment_distributions /= len(frames) #
|
|
|
124 |
scene_prob /= len(frames)
|
125 |
scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
|
126 |
-
|
127 |
-
|
128 |
-
scene_scores.append(
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
142 |
return None
|
143 |
|
144 |
-
start_time =
|
145 |
-
end_time = scene_data['end_time']
|
146 |
start_seconds = convert_timestamp_to_seconds(start_time)
|
147 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
148 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
|
|
104 |
"A still shot of natural scenery",
|
105 |
"Still-camera shot of a person's face"
|
106 |
]
|
107 |
+
|
108 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
109 |
text_features = model.get_text_features(**text_inputs).detach()
|
110 |
positive_feature, negative_features = text_features[0], text_features[1:]
|
|
|
116 |
continue
|
117 |
|
118 |
scene_prob = 0.0
|
119 |
+
sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
|
120 |
for frame in frames:
|
121 |
+
image = Image.fromarray(frame[..., ::-1])
|
122 |
+
image_input = processor(images=image, return_tensors="pt").to(device)
|
123 |
+
with torch.no_grad():
|
124 |
+
image_features = model.get_image_features(**image_input).detach()
|
125 |
+
positive_similarity = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0)).squeeze().item()
|
126 |
+
negative_similarities = torch.cosine_similarity(image_features, negative_features).squeeze().mean().item()
|
127 |
+
scene_prob += positive_similarity - negative_similarities
|
128 |
+
|
129 |
frame_sentiments = classify_frame(frame)
|
130 |
sentiment_distributions += np.array(frame_sentiments)
|
131 |
|
132 |
+
sentiment_distributions /= len(frames) # Normalize to get average probabilities
|
133 |
+
sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
|
134 |
scene_prob /= len(frames)
|
135 |
scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
|
136 |
+
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}, Duration={scene_duration}, Sentiments: {sentiment_percentages}")
|
137 |
+
|
138 |
+
scene_scores.append((scene_prob, start_time, end_time, scene_duration, sentiment_percentages))
|
139 |
+
|
140 |
+
# Sort scenes by probability and select the best scene
|
141 |
+
scene_scores.sort(reverse=True, key=lambda x: x[0])
|
142 |
+
best_scene = max(scene_scores, key=lambda x: x[3]) # Select based on duration among the top scenes
|
143 |
+
|
144 |
+
if best_scene:
|
145 |
+
print(f"Best Scene: Start={best_scene[1]}, End={best_scene[2]}, Probability={best_scene[0]}, Duration={best_scene[3]}, Sentiments: {best_scene[4]}")
|
146 |
+
else:
|
147 |
+
print("No suitable scene found")
|
148 |
+
|
149 |
+
return best_scene[1:3] if best_scene else None
|
150 |
+
|
151 |
+
|
152 |
+
|
153 |
+
def extract_best_scene(video_path, scene):
|
154 |
+
if scene is None:
|
155 |
return None
|
156 |
|
157 |
+
start_time, end_time = scene
|
|
|
158 |
start_seconds = convert_timestamp_to_seconds(start_time)
|
159 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
160 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|