Spaces:
Sleeping
Sleeping
Update video_processing.py
Browse files- video_processing.py +2 -2
video_processing.py
CHANGED
@@ -61,7 +61,7 @@ def analyze_scenes(video_path, scenes, description):
|
|
61 |
best_scene = None
|
62 |
|
63 |
# Tokenize and encode the description text
|
64 |
-
text_inputs = processor(text=description, return_tensors="pt").to(device)
|
65 |
text_features = model.get_text_features(**text_inputs).detach()
|
66 |
|
67 |
for scene_num, (start_time, end_time) in enumerate(scenes):
|
@@ -78,7 +78,7 @@ def analyze_scenes(video_path, scenes, description):
|
|
78 |
image_features = model.get_image_features(**image_input).detach()
|
79 |
logits = (image_features @ text_features.T).squeeze()
|
80 |
probs = logits.softmax(dim=0)
|
81 |
-
scene_prob += max(
|
82 |
|
83 |
scene_prob /= len(frames)
|
84 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}")
|
|
|
61 |
best_scene = None
|
62 |
|
63 |
# Tokenize and encode the description text
|
64 |
+
text_inputs = processor(text=[description], return_tensors="pt", padding=True).to(device)
|
65 |
text_features = model.get_text_features(**text_inputs).detach()
|
66 |
|
67 |
for scene_num, (start_time, end_time) in enumerate(scenes):
|
|
|
78 |
image_features = model.get_image_features(**image_input).detach()
|
79 |
logits = (image_features @ text_features.T).squeeze()
|
80 |
probs = logits.softmax(dim=0)
|
81 |
+
scene_prob += probs.max().item()
|
82 |
|
83 |
scene_prob /= len(frames)
|
84 |
print(f"Scene {scene_num + 1}: Start={start_time}, End={end_time}, Probability={scene_prob}")
|