Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +20 -32
video_processing.py
CHANGED
@@ -104,7 +104,6 @@ def analyze_scenes(video_path, scenes, description):
|
|
104 |
"A still shot of natural scenery",
|
105 |
"Still-camera shot of a person's face"
|
106 |
]
|
107 |
-
|
108 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
109 |
text_features = model.get_text_features(**text_inputs).detach()
|
110 |
positive_feature, negative_features = text_features[0], text_features[1:]
|
@@ -116,45 +115,34 @@ def analyze_scenes(video_path, scenes, description):
|
|
116 |
continue
|
117 |
|
118 |
scene_prob = 0.0
|
119 |
-
sentiment_distributions = np.zeros(8) # Assuming
|
120 |
for frame in frames:
|
121 |
-
image = Image.fromarray(frame[..., ::-1])
|
122 |
-
image_input = processor(images=image, return_tensors="pt").to(device)
|
123 |
-
with torch.no_grad():
|
124 |
-
image_features = model.get_image_features(**image_input).detach()
|
125 |
-
positive_similarity = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0)).squeeze().item()
|
126 |
-
negative_similarities = torch.cosine_similarity(image_features, negative_features).squeeze().mean().item()
|
127 |
-
scene_prob += positive_similarity - negative_similarities
|
128 |
-
|
129 |
frame_sentiments = classify_frame(frame)
|
130 |
sentiment_distributions += np.array(frame_sentiments)
|
131 |
|
132 |
-
sentiment_distributions /= len(frames) #
|
133 |
-
sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
|
134 |
scene_prob /= len(frames)
|
135 |
scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
|
136 |
-
|
137 |
-
|
138 |
-
scene_scores.append(
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
else
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
def extract_best_scene(video_path, scene):
|
154 |
-
if scene is None:
|
155 |
return None
|
156 |
|
157 |
-
start_time
|
|
|
158 |
start_seconds = convert_timestamp_to_seconds(start_time)
|
159 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
160 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|
|
|
104 |
"A still shot of natural scenery",
|
105 |
"Still-camera shot of a person's face"
|
106 |
]
|
|
|
107 |
text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
|
108 |
text_features = model.get_text_features(**text_inputs).detach()
|
109 |
positive_feature, negative_features = text_features[0], text_features[1:]
|
|
|
115 |
continue
|
116 |
|
117 |
scene_prob = 0.0
|
118 |
+
sentiment_distributions = np.zeros(8) # Assuming 8 sentiments
|
119 |
for frame in frames:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
frame_sentiments = classify_frame(frame)
|
121 |
sentiment_distributions += np.array(frame_sentiments)
|
122 |
|
123 |
+
sentiment_distributions /= len(frames) # Average probabilities
|
|
|
124 |
scene_prob /= len(frames)
|
125 |
scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
|
126 |
+
sentiment_percentages = {categories[i]: round(sentiment_distributions[i] * 100, 2) for i in range(len(categories))}
|
127 |
+
|
128 |
+
scene_scores.append({
|
129 |
+
'probability': scene_prob,
|
130 |
+
'start_time': start_time,
|
131 |
+
'end_time': end_time,
|
132 |
+
'duration': scene_duration,
|
133 |
+
'sentiments': sentiment_percentages
|
134 |
+
})
|
135 |
+
|
136 |
+
best_scene = max(scene_scores, key=lambda x: (x['probability'], x['duration'])) if scene_scores else None
|
137 |
+
return best_scene
|
138 |
+
|
139 |
+
|
140 |
+
def extract_best_scene(video_path, scene_data):
|
141 |
+
if not scene_data:
|
|
|
|
|
|
|
142 |
return None
|
143 |
|
144 |
+
start_time = scene_data['start_time']
|
145 |
+
end_time = scene_data['end_time']
|
146 |
start_seconds = convert_timestamp_to_seconds(start_time)
|
147 |
end_seconds = convert_timestamp_to_seconds(end_time)
|
148 |
video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
|