jschwab21 commited on
Commit
192d4c3
·
verified ·
1 Parent(s): e579fcc

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +11 -8
video_processing.py CHANGED
@@ -6,6 +6,7 @@ from moviepy.editor import VideoFileClip
6
  from transformers import CLIPProcessor, CLIPModel
7
  import torch
8
  import yt_dlp
 
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
@@ -61,14 +62,16 @@ def analyze_scenes(video_path, scenes, description):
61
  for start_time, end_time in scenes:
62
  frames = extract_frames(video_path, start_time, end_time)
63
  for frame in frames:
64
- inputs = processor(text=description, images=frame, return_tensors="pt", padding=True)
65
- outputs = model(**inputs)
66
- logits_per_image = outputs.logits_per_image
67
- probs = logits_per_image.softmax(dim=1)
68
- max_prob = max(probs[0]).item()
69
- if max_prob > highest_prob:
70
- highest_prob = max_prob
71
- best_scene = (start_time, end_time)
 
 
72
 
73
  return best_scene
74
 
 
6
  from transformers import CLIPProcessor, CLIPModel
7
  import torch
8
  import yt_dlp
9
+ from PIL import Image
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 
62
  for start_time, end_time in scenes:
63
  frames = extract_frames(video_path, start_time, end_time)
64
  for frame in frames:
65
+ image = Image.fromarray(frame[..., ::-1])
66
+ inputs = processor(text=description, images=image, return_tensors="pt", padding=True).to(device)
67
+ with torch.no_grad():
68
+ outputs = model(**inputs)
69
+ logits_per_image = outputs.logits_per_image
70
+ probs = logits_per_image.softmax(dim=1)
71
+ max_prob = max(probs[0]).item()
72
+ if max_prob > highest_prob:
73
+ highest_prob = max_prob
74
+ best_scene = (start_time, end_time)
75
 
76
  return best_scene
77