jschwab21 commited on
Commit
3278cee
·
verified ·
1 Parent(s): 8349e5c

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +20 -32
video_processing.py CHANGED
@@ -104,7 +104,6 @@ def analyze_scenes(video_path, scenes, description):
104
  "A still shot of natural scenery",
105
  "Still-camera shot of a person's face"
106
  ]
107
-
108
  text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
109
  text_features = model.get_text_features(**text_inputs).detach()
110
  positive_feature, negative_features = text_features[0], text_features[1:]
@@ -116,45 +115,34 @@ def analyze_scenes(video_path, scenes, description):
116
  continue
117
 
118
  scene_prob = 0.0
119
- sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
120
  for frame in frames:
121
- image = Image.fromarray(frame[..., ::-1])
122
- image_input = processor(images=image, return_tensors="pt").to(device)
123
- with torch.no_grad():
124
- image_features = model.get_image_features(**image_input).detach()
125
- positive_similarity = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0)).squeeze().item()
126
- negative_similarities = torch.cosine_similarity(image_features, negative_features).squeeze().mean().item()
127
- scene_prob += positive_similarity - negative_similarities
128
-
129
  frame_sentiments = classify_frame(frame)
130
  sentiment_distributions += np.array(frame_sentiments)
131
 
132
- sentiment_distributions /= len(frames) # Normalize to get average probabilities
133
- sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
134
  scene_prob /= len(frames)
135
  scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
136
- print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}, Duration={scene_duration}, Sentiments: {sentiment_percentages}")
137
-
138
- scene_scores.append((scene_prob, start_time, end_time, scene_duration, sentiment_percentages))
139
-
140
- # Sort scenes by probability and select the best scene
141
- scene_scores.sort(reverse=True, key=lambda x: x[0])
142
- best_scene = max(scene_scores, key=lambda x: x[3]) # Select based on duration among the top scenes
143
-
144
- if best_scene:
145
- print(f"Best Scene: Start={best_scene[1]}, End={best_scene[2]}, Probability={best_scene[0]}, Duration={best_scene[3]}, Sentiments: {best_scene[4]}")
146
- else:
147
- print("No suitable scene found")
148
-
149
- return best_scene[1:3] if best_scene else None
150
-
151
-
152
-
153
- def extract_best_scene(video_path, scene):
154
- if scene is None:
155
  return None
156
 
157
- start_time, end_time = scene
 
158
  start_seconds = convert_timestamp_to_seconds(start_time)
159
  end_seconds = convert_timestamp_to_seconds(end_time)
160
  video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)
 
104
  "A still shot of natural scenery",
105
  "Still-camera shot of a person's face"
106
  ]
 
107
  text_inputs = processor(text=[description] + negative_descriptions, return_tensors="pt", padding=True).to(device)
108
  text_features = model.get_text_features(**text_inputs).detach()
109
  positive_feature, negative_features = text_features[0], text_features[1:]
 
115
  continue
116
 
117
  scene_prob = 0.0
118
+ sentiment_distributions = np.zeros(8) # Assuming 8 sentiments
119
  for frame in frames:
 
 
 
 
 
 
 
 
120
  frame_sentiments = classify_frame(frame)
121
  sentiment_distributions += np.array(frame_sentiments)
122
 
123
+ sentiment_distributions /= len(frames) # Average probabilities
 
124
  scene_prob /= len(frames)
125
  scene_duration = convert_timestamp_to_seconds(end_time) - convert_timestamp_to_seconds(start_time)
126
+ sentiment_percentages = {categories[i]: round(sentiment_distributions[i] * 100, 2) for i in range(len(categories))}
127
+
128
+ scene_scores.append({
129
+ 'probability': scene_prob,
130
+ 'start_time': start_time,
131
+ 'end_time': end_time,
132
+ 'duration': scene_duration,
133
+ 'sentiments': sentiment_percentages
134
+ })
135
+
136
+ best_scene = max(scene_scores, key=lambda x: (x['probability'], x['duration'])) if scene_scores else None
137
+ return best_scene
138
+
139
+
140
+ def extract_best_scene(video_path, scene_data):
141
+ if not scene_data:
 
 
 
142
  return None
143
 
144
+ start_time = scene_data['start_time']
145
+ end_time = scene_data['end_time']
146
  start_seconds = convert_timestamp_to_seconds(start_time)
147
  end_seconds = convert_timestamp_to_seconds(end_time)
148
  video_clip = VideoFileClip(video_path).subclip(start_seconds, end_seconds)