Spaces:
Runtime error
Runtime error
Commit
·
a6c8793
1
Parent(s):
a23243f
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,10 @@ from transformers import AutoProcessor, AutoModel
|
|
5 |
from PIL import Image
|
6 |
from decord import VideoReader, cpu
|
7 |
|
|
|
|
|
|
|
|
|
8 |
def sample_uniform_frame_indices(clip_len, seg_len):
|
9 |
if seg_len < clip_len:
|
10 |
repeat_factor = np.ceil(clip_len / seg_len).astype(int)
|
@@ -45,21 +49,28 @@ def model_interface(uploaded_video, model_choice, activity):
|
|
45 |
"microsoft/xclip-base-patch32-16-frames": 16,
|
46 |
"microsoft/xclip-base-patch32": 8
|
47 |
}.get(model_choice, 32)
|
|
|
48 |
indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
|
49 |
video = read_video_decord(uploaded_video, indices)
|
50 |
concatenated_image = concatenate_frames(video, clip_len)
|
51 |
|
52 |
-
# Appending "other" to the list of activities
|
53 |
activities_list = [activity, "other"]
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
inputs = processor(
|
57 |
text=activities_list,
|
58 |
-
videos=
|
59 |
return_tensors="pt",
|
60 |
padding=True,
|
61 |
)
|
62 |
|
|
|
|
|
63 |
with torch.no_grad():
|
64 |
outputs = model(**inputs)
|
65 |
|
|
|
5 |
from PIL import Image
|
6 |
from decord import VideoReader, cpu
|
7 |
|
8 |
+
# Use GPU if available
|
9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
+
torch.backends.cudnn.benchmark = True
|
11 |
+
|
12 |
def sample_uniform_frame_indices(clip_len, seg_len):
|
13 |
if seg_len < clip_len:
|
14 |
repeat_factor = np.ceil(clip_len / seg_len).astype(int)
|
|
|
49 |
"microsoft/xclip-base-patch32-16-frames": 16,
|
50 |
"microsoft/xclip-base-patch32": 8
|
51 |
}.get(model_choice, 32)
|
52 |
+
|
53 |
indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
|
54 |
video = read_video_decord(uploaded_video, indices)
|
55 |
concatenated_image = concatenate_frames(video, clip_len)
|
56 |
|
|
|
57 |
activities_list = [activity, "other"]
|
58 |
+
|
59 |
+
processor = AutoProcessor.from_pretrained(model_choice).to(device)
|
60 |
+
model = AutoModel.from_pretrained(model_choice).to(device)
|
61 |
+
|
62 |
+
# Convert the list of frames to a single numpy array for efficient conversion to a tensor
|
63 |
+
video_np_array = np.array(video)
|
64 |
+
|
65 |
inputs = processor(
|
66 |
text=activities_list,
|
67 |
+
videos=video_np_array,
|
68 |
return_tensors="pt",
|
69 |
padding=True,
|
70 |
)
|
71 |
|
72 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
73 |
+
|
74 |
with torch.no_grad():
|
75 |
outputs = model(**inputs)
|
76 |
|