jschwab21 commited on
Commit
d4f2cec
·
verified ·
1 Parent(s): c25bcaf

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +18 -15
video_processing.py CHANGED
@@ -93,7 +93,7 @@ def extract_frames(video, start_time, end_time):
93
  frames.append(frame)
94
  return frames
95
 
96
- def analyze_scenes(video_path, scenes, description):
97
  scene_scores = []
98
  negative_descriptions = [
99
  "black screen",
@@ -103,10 +103,9 @@ def analyze_scenes(video_path, scenes, description):
103
  #"A still shot of natural scenery",
104
  #"Still-camera shot of a person's face"
105
  ]
106
-
107
- preprocess = transforms.Compose([
108
- transforms.ToTensor(), # Directly convert numpy arrays to tensors
109
- transforms.Resize((224, 224)), # Resize the tensor
110
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the tensor
111
  ])
112
 
@@ -122,19 +121,23 @@ def analyze_scenes(video_path, scenes, description):
122
  print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time} - No frames extracted")
123
  continue
124
 
 
 
125
  scene_prob = 0.0
126
  sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
127
- for frame in frames:
128
- # Directly preprocess the frame
129
- frame_tensor = preprocess(frame).unsqueeze(0).to(device) # Add batch dimension and send to device
130
  with torch.no_grad():
131
- image_features = model.get_image_features(pixel_values=frame_tensor).detach()
132
- positive_similarity = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0)).squeeze().item()
133
- negative_similarities = torch.cosine_similarity(image_features, negative_features).squeeze().mean().item()
134
- scene_prob += positive_similarity - negative_similarities
135
-
136
- frame_sentiments = classify_frame(frame)
137
- sentiment_distributions += np.array(frame_sentiments)
 
 
138
 
139
  sentiment_distributions /= len(frames) # Normalize to get average probabilities
140
  sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
 
93
  frames.append(frame)
94
  return frames
95
 
96
+ def analyze_scenes(video_path, scenes, description, batch_size=6):
97
  scene_scores = []
98
  negative_descriptions = [
99
  "black screen",
 
103
  #"A still shot of natural scenery",
104
  #"Still-camera shot of a person's face"
105
  ]
106
+ preprocess = transforms.Compose([
107
+ transforms.ToTensor(), # Convert numpy arrays directly to tensors
108
+ transforms.Resize((224, 224)), # Resize the tensor to fit model input
 
109
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the tensor
110
  ])
111
 
 
121
  print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time} - No frames extracted")
122
  continue
123
 
124
+ # Create batches of frames for processing
125
+ batches = [frames[i:i + batch_size] for i in range(0, len(frames), batch_size)]
126
  scene_prob = 0.0
127
  sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
128
+
129
+ for batch in batches:
130
+ batch_tensors = torch.stack([preprocess(frame) for frame in batch]).to(device)
131
  with torch.no_grad():
132
+ image_features = model.get_image_features(pixel_values=batch_tensors).detach()
133
+ positive_similarities = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0))
134
+ negative_similarities = torch.cosine_similarity(image_features, negative_features.unsqueeze(0).mean(dim=0, keepdim=True))
135
+ scene_prob += positive_similarities.mean().item() - negative_similarities.mean().item()
136
+
137
+ # Sum up the sentiments for all frames in the batch
138
+ for frame in batch:
139
+ frame_sentiments = classify_frame(frame)
140
+ sentiment_distributions += np.array(frame_sentiments)
141
 
142
  sentiment_distributions /= len(frames) # Normalize to get average probabilities
143
  sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}