IbrahimHasani commited on
Commit
a6c8793
·
1 Parent(s): a23243f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -5,6 +5,10 @@ from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
  from decord import VideoReader, cpu
7
 
 
 
 
 
8
  def sample_uniform_frame_indices(clip_len, seg_len):
9
  if seg_len < clip_len:
10
  repeat_factor = np.ceil(clip_len / seg_len).astype(int)
@@ -45,21 +49,28 @@ def model_interface(uploaded_video, model_choice, activity):
45
  "microsoft/xclip-base-patch32-16-frames": 16,
46
  "microsoft/xclip-base-patch32": 8
47
  }.get(model_choice, 32)
 
48
  indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
49
  video = read_video_decord(uploaded_video, indices)
50
  concatenated_image = concatenate_frames(video, clip_len)
51
 
52
- # Appending "other" to the list of activities
53
  activities_list = [activity, "other"]
54
- processor = AutoProcessor.from_pretrained(model_choice)
55
- model = AutoModel.from_pretrained(model_choice)
 
 
 
 
 
56
  inputs = processor(
57
  text=activities_list,
58
- videos=list(video),
59
  return_tensors="pt",
60
  padding=True,
61
  )
62
 
 
 
63
  with torch.no_grad():
64
  outputs = model(**inputs)
65
 
 
5
  from PIL import Image
6
  from decord import VideoReader, cpu
7
 
8
+ # Use GPU if available
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ torch.backends.cudnn.benchmark = True
11
+
12
  def sample_uniform_frame_indices(clip_len, seg_len):
13
  if seg_len < clip_len:
14
  repeat_factor = np.ceil(clip_len / seg_len).astype(int)
 
49
  "microsoft/xclip-base-patch32-16-frames": 16,
50
  "microsoft/xclip-base-patch32": 8
51
  }.get(model_choice, 32)
52
+
53
  indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
54
  video = read_video_decord(uploaded_video, indices)
55
  concatenated_image = concatenate_frames(video, clip_len)
56
 
 
57
  activities_list = [activity, "other"]
58
+
59
+ processor = AutoProcessor.from_pretrained(model_choice).to(device)
60
+ model = AutoModel.from_pretrained(model_choice).to(device)
61
+
62
+ # Convert the list of frames to a single numpy array for efficient conversion to a tensor
63
+ video_np_array = np.array(video)
64
+
65
  inputs = processor(
66
  text=activities_list,
67
+ videos=video_np_array,
68
  return_tensors="pt",
69
  padding=True,
70
  )
71
 
72
+ inputs = {k: v.to(device) for k, v in inputs.items()}
73
+
74
  with torch.no_grad():
75
  outputs = model(**inputs)
76