jschwab21 commited on
Commit
da8565f
·
verified ·
1 Parent(s): f9d282c

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +32 -20
video_processing.py CHANGED
@@ -104,6 +104,7 @@ def analyze_scenes(video_path, scenes, description):
104
  "A still shot of natural scenery",
105
  "Still-camera shot of a person's face"
106
  ]
 
107
  text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
108
  text_features = model.get_text_features(**text_inputs).detach()
109
  positive_feature, negative_features = text_features[0], text_features[1:]
@@ -115,34 +116,45 @@ def analyze_scenes(video_path, scenes, description):
115
  continue
116
 
117
  scene_prob = 0.0
118
- sentiment_distributions = np.zeros(8) # Assuming 8 sentiments
119
  for frame in frames:
 
 
 
 
 
 
 
 
120
  frame_sentiments = classify_frame(frame)
121
  sentiment_distributions += np.array(frame_sentiments)
122
 
123
- sentiment_distributions /= len(frames) # Average probabilities
 
124
  scene_prob /= len(frames)
125
  scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
126
- sentiment_percentages = {categories[i]: round(sentiment_distributions[i] * 100, 2) for i in range(len(categories))}
127
-
128
- scene_scores.append({
129
- 'probability': scene_prob,
130
- 'start_time': start_time,
131
- 'end_time': end_time,
132
- 'duration': scene_duration,
133
- 'sentiments': sentiment_percentages
134
- })
135
-
136
- best_scene = max(scene_scores, key=lambda x: (x['probability'], x['duration'])) if scene_scores else None
137
- return best_scene
138
-
139
-
140
- def extract_best_scene(video_path, scene_data):
141
- if not scene_data:
 
 
 
142
  return None
143
 
144
- start_time = scene_data['start_time']
145
- end_time = scene_data['end_time']
146
  start_seconds = convert_timestamp_to_seconds(start_time)
147
  end_seconds = convert_timestamp_to_seconds(end_time)
148
  video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
 
104
  "A still shot of natural scenery",
105
  "Still-camera shot of a person's face"
106
  ]
107
+
108
  text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
109
  text_features = model.get_text_features(**text_inputs).detach()
110
  positive_feature, negative_features = text_features[0], text_features[1:]
 
116
  continue
117
 
118
  scene_prob = 0.0
119
+ sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
120
  for frame in frames:
121
+ image = Image.fromarray(frame[..., ::-1])
122
+ image_input = processor(images=image, return_tensors="pt").to(device)
123
+ with torch.no_grad():
124
+ image_features = model.get_image_features(**image_input).detach()
125
+ positive_similarity = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0)).squeeze().item()
126
+ negative_similarities = torch.cosine_similarity(image_features, negative_features).squeeze().mean().item()
127
+ scene_prob += positive_similarity - negative_similarities
128
+
129
  frame_sentiments = classify_frame(frame)
130
  sentiment_distributions += np.array(frame_sentiments)
131
 
132
+ sentiment_distributions /= len(frames) # Normalize to get average probabilities
133
+ sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
134
  scene_prob /= len(frames)
135
  scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
136
+ print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}, Duration={scene_duration}, Sentiments: {sentiment_percentages}")
137
+
138
+ scene_scores.append((scene_prob, start_time, end_time, scene_duration, sentiment_percentages))
139
+
140
+ # Sort scenes by probability and select the best scene
141
+ scene_scores.sort(reverse=True, key=lambda x: x[0])
142
+ best_scene = max(scene_scores, key=lambda x: x[3]) # Select based on duration among the top scenes
143
+
144
+ if best_scene:
145
+ print(f"Best Scene: Start={best_scene[1]}, End={best_scene[2]}, Probability={best_scene[0]}, Duration={best_scene[3]}, Sentiments: {best_scene[4]}")
146
+ else:
147
+ print("No suitable scene found")
148
+
149
+ return best_scene[1:3] if best_scene else None
150
+
151
+
152
+
153
+ def extract_best_scene(video_path, scene):
154
+ if scene is None:
155
  return None
156
 
157
+ start_time, end_time = scene
 
158
  start_seconds = convert_timestamp_to_seconds(start_time)
159
  end_seconds = convert_timestamp_to_seconds(end_time)
160
  video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)