jschwab21 commited on
Commit
f0f6ac7
·
verified ·
1 Parent(s): 4a7c9de

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +36 -1
video_processing.py CHANGED
@@ -8,11 +8,41 @@ import torch
8
  import yt_dlp
9
  from PIL import Image
10
  import uuid
 
 
 
11
 
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
14
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def download_video(url):
17
  ydl_opts = {
18
  'format': 'bestvideo[height<=1440]+bestaudio/best[height<=1440]',
@@ -124,8 +154,13 @@ def process_video(video_url, description):
124
  video_path = download_video(video_url)
125
  scenes = find_scenes(video_path)
126
  best_scene = analyze_scenes(video_path, scenes, description)
 
 
 
 
 
 
127
  final_clip = extract_best_scene(video_path, best_scene)
128
-
129
  if final_clip:
130
  output_dir = "output"
131
  os.makedirs(output_dir, exist_ok=True)
 
8
  import yt_dlp
9
  from PIL import Image
10
  import uuid
11
+ from torchvision import models, transforms
12
+ from torch.nn import functional as F
13
+
14
 
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
17
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
18
 
19
+
20
+ def classify_frame(frame):
21
+ categories = ["Joy", "Trust", "Fear", "Surprise", "Sadness", "Disgust", "Anger", "Anticipation"]
22
+ # Load ResNet-50 model
23
+ resnet50 = models.resnet50(pretrained=True)
24
+ resnet50.eval().to(device)
25
+
26
+ # Preprocess the image
27
+ preprocess = transforms.Compose([
28
+ transforms.Resize(256),
29
+ transforms.CenterCrop(224),
30
+ transforms.ToTensor(),
31
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
32
+ ])
33
+ input_tensor = preprocess(Image.fromarray(frame))
34
+ input_batch = input_tensor.unsqueeze(0).to(device)
35
+
36
+ # Predict with ResNet-50
37
+ with torch.no_grad():
38
+ output = resnet50(input_batch)
39
+ probabilities = F.softmax(output[0], dim=0)
40
+
41
+ # Assuming categories correspond to indices (this is for demo, adjust accordingly)
42
+ results = {categories[i]: probabilities[i].item() for i in range(len(categories))}
43
+ return results
44
+
45
+
46
  def download_video(url):
47
  ydl_opts = {
48
  'format': 'bestvideo[height<=1440]+bestaudio/best[height<=1440]',
 
154
  video_path = download_video(video_url)
155
  scenes = find_scenes(video_path)
156
  best_scene = analyze_scenes(video_path, scenes, description)
157
+ if best_scene:
158
+ frames = extract_frames(video_path, *best_scene)
159
+ if frames:
160
+ # Classify the first frame
161
+ frame_results = classify_frame(frames[0])
162
+ print("Classification of the first frame:", frame_results)
163
  final_clip = extract_best_scene(video_path, best_scene)
 
164
  if final_clip:
165
  output_dir = "output"
166
  os.makedirs(output_dir, exist_ok=True)