Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +18 -15
video_processing.py
CHANGED
@@ -93,7 +93,7 @@ def extract_frames(video, start_time, end_time):
|
|
93 |
frames.append(frame)
|
94 |
return frames
|
95 |
|
96 |
-
def analyze_scenes(video_path, scenes, description):
|
97 |
scene_scores = []
|
98 |
negative_descriptions = [
|
99 |
"black screen",
|
@@ -103,10 +103,9 @@ def analyze_scenes(video_path, scenes, description):
|
|
103 |
#"A still shot of natural scenery",
|
104 |
#"Still-camera shot of a person's face"
|
105 |
]
|
106 |
-
|
107 |
-
|
108 |
-
transforms.
|
109 |
-
transforms.Resize((224, 224)), # Resize the tensor
|
110 |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the tensor
|
111 |
])
|
112 |
|
@@ -122,19 +121,23 @@ def analyze_scenes(video_path, scenes, description):
|
|
122 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time} - No frames extracted")
|
123 |
continue
|
124 |
|
|
|
|
|
125 |
scene_prob = 0.0
|
126 |
sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
with torch.no_grad():
|
131 |
-
image_features = model.get_image_features(pixel_values=
|
132 |
-
|
133 |
-
negative_similarities = torch.cosine_similarity(image_features, negative_features
|
134 |
-
scene_prob +=
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
138 |
|
139 |
sentiment_distributions /= len(frames) # Normalize to get average probabilities
|
140 |
sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
|
|
|
93 |
frames.append(frame)
|
94 |
return frames
|
95 |
|
96 |
+
def analyze_scenes(video_path, scenes, description, batch_size=6):
|
97 |
scene_scores = []
|
98 |
negative_descriptions = [
|
99 |
"black screen",
|
|
|
103 |
#"A still shot of natural scenery",
|
104 |
#"Still-camera shot of a person's face"
|
105 |
]
|
106 |
+
preprocess = transforms.Compose([
|
107 |
+
transforms.ToTensor(), # Convert numpy arrays directly to tensors
|
108 |
+
transforms.Resize((224, 224)), # Resize the tensor to fit model input
|
|
|
109 |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize the tensor
|
110 |
])
|
111 |
|
|
|
121 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time} - No frames extracted")
|
122 |
continue
|
123 |
|
124 |
+
# Create batches of frames for processing
|
125 |
+
batches = [frames[i:i + batch_size] for i in range(0, len(frames), batch_size)]
|
126 |
scene_prob = 0.0
|
127 |
sentiment_distributions = np.zeros(8) # Assuming there are 8 sentiments
|
128 |
+
|
129 |
+
for batch in batches:
|
130 |
+
batch_tensors = torch.stack([preprocess(frame) for frame in batch]).to(device)
|
131 |
with torch.no_grad():
|
132 |
+
image_features = model.get_image_features(pixel_values=batch_tensors).detach()
|
133 |
+
positive_similarities = torch.cosine_similarity(image_features, positive_feature.unsqueeze(0))
|
134 |
+
negative_similarities = torch.cosine_similarity(image_features, negative_features.unsqueeze(0).mean(dim=0, keepdim=True))
|
135 |
+
scene_prob += positive_similarities.mean().item() - negative_similarities.mean().item()
|
136 |
+
|
137 |
+
# Sum up the sentiments for all frames in the batch
|
138 |
+
for frame in batch:
|
139 |
+
frame_sentiments = classify_frame(frame)
|
140 |
+
sentiment_distributions += np.array(frame_sentiments)
|
141 |
|
142 |
sentiment_distributions /= len(frames) # Normalize to get average probabilities
|
143 |
sentiment_percentages = {category: round(prob * 100, 2) for category, prob in zip(categories, sentiment_distributions)}
|