IbrahimHasani commited on
Commit
8d1f721
·
1 Parent(s): a29b529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -5,13 +5,16 @@ from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
  import cv2
7
 
 
8
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
9
  CLIP_LEN = 32
10
 
11
- # Load model and processor once
12
- processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
- model = AutoModel.from_pretrained(MODEL_NAME)
14
 
 
 
 
15
 
16
  def get_video_length(file_path):
17
  cap = cv2.VideoCapture(file_path)
@@ -22,8 +25,8 @@ def get_video_length(file_path):
22
  def read_video_opencv(file_path, indices):
23
  cap = cv2.VideoCapture(file_path)
24
  frames = []
25
- for i in indices:
26
- cap.set(cv2.CAP_PROP_POS_FRAMES, i)
27
  ret, frame = cap.read()
28
  if ret:
29
  frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
@@ -40,11 +43,13 @@ def sample_uniform_frame_indices(clip_len, seg_len):
40
  indices = [i * spacing for i in range(clip_len)]
41
  return np.array(indices).astype(np.int64)
42
 
43
-
 
 
 
44
 
45
  def concatenate_frames(frames, clip_len):
46
- layout = { 32: (4, 8) }
47
- rows, cols = layout[clip_len]
48
  combined_image = Image.new('RGB', (frames[0].shape[1]*cols, frames[0].shape[0]*rows))
49
  frame_iter = iter(frames)
50
  y_offset = 0
@@ -69,7 +74,7 @@ def model_interface(uploaded_video, activity):
69
  videos=list(video),
70
  return_tensors="pt",
71
  padding=True,
72
- )
73
 
74
  with torch.no_grad():
75
  outputs = model(**inputs)
 
5
  from PIL import Image
6
  import cv2
7
 
8
+ # Constants
9
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
10
  CLIP_LEN = 32
11
 
12
+ # Check for GPU and set device
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
14
 
15
+ # Load model and processor
16
+ processor = AutoProcessor.from_pretrained(MODEL_NAME)
17
+ model = AutoModel.from_pretrained(MODEL_NAME).to(device).eval()
18
 
19
  def get_video_length(file_path):
20
  cap = cv2.VideoCapture(file_path)
 
25
  def read_video_opencv(file_path, indices):
26
  cap = cv2.VideoCapture(file_path)
27
  frames = []
28
+ for idx in indices:
29
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
30
  ret, frame = cap.read()
31
  if ret:
32
  frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
 
43
  indices = [i * spacing for i in range(clip_len)]
44
  return np.array(indices).astype(np.int64)
45
 
46
+ def get_concatenation_layout(clip_len):
47
+ # Modify as needed for other clip lengths
48
+ if clip_len == 32:
49
+ return 4, 8
50
 
51
  def concatenate_frames(frames, clip_len):
52
+ rows, cols = get_concatenation_layout(clip_len)
 
53
  combined_image = Image.new('RGB', (frames[0].shape[1]*cols, frames[0].shape[0]*rows))
54
  frame_iter = iter(frames)
55
  y_offset = 0
 
74
  videos=list(video),
75
  return_tensors="pt",
76
  padding=True,
77
+ ).to(device) # Move inputs to GPU if available
78
 
79
  with torch.no_grad():
80
  outputs = model(**inputs)