Spaces:

IbrahimHasani
/

ActionDetectionVideo

Runtime error

IbrahimHasani commited on Aug 16, 2023

Commit

8d1f721

1 Parent(s): a29b529

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,16 @@ from transformers import AutoProcessor, AutoModel
 from PIL import Image
 import cv2
 MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
 CLIP_LEN = 32
-# Load model and processor once
-processor = AutoProcessor.from_pretrained(MODEL_NAME)
-model = AutoModel.from_pretrained(MODEL_NAME)
 def get_video_length(file_path):
     cap = cv2.VideoCapture(file_path)
@@ -22,8 +25,8 @@ def get_video_length(file_path):
 def read_video_opencv(file_path, indices):
     cap = cv2.VideoCapture(file_path)
     frames = []
-    for i in indices:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
         ret, frame = cap.read()
         if ret:
             frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
@@ -40,11 +43,13 @@ def sample_uniform_frame_indices(clip_len, seg_len):
         indices = [i * spacing for i in range(clip_len)]
     return np.array(indices).astype(np.int64)
 def concatenate_frames(frames, clip_len):
-    layout = { 32: (4, 8) }
-    rows, cols = layout[clip_len]
     combined_image = Image.new('RGB', (frames[0].shape[1]*cols, frames[0].shape[0]*rows))
     frame_iter = iter(frames)
     y_offset = 0
@@ -69,7 +74,7 @@ def model_interface(uploaded_video, activity):
         videos=list(video),
         return_tensors="pt",
         padding=True,
-    )
     with torch.no_grad():
         outputs = model(**inputs)

 from PIL import Image
 import cv2
+# Constants
 MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
 CLIP_LEN = 32
+# Check for GPU and set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load model and processor
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+model = AutoModel.from_pretrained(MODEL_NAME).to(device).eval()
 def get_video_length(file_path):
     cap = cv2.VideoCapture(file_path)
 def read_video_opencv(file_path, indices):
     cap = cv2.VideoCapture(file_path)
     frames = []
+    for idx in indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
         ret, frame = cap.read()
         if ret:
             frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         indices = [i * spacing for i in range(clip_len)]
     return np.array(indices).astype(np.int64)
+def get_concatenation_layout(clip_len):
+    # Modify as needed for other clip lengths
+    if clip_len == 32:
+        return 4, 8
 def concatenate_frames(frames, clip_len):
+    rows, cols = get_concatenation_layout(clip_len)
     combined_image = Image.new('RGB', (frames[0].shape[1]*cols, frames[0].shape[0]*rows))
     frame_iter = iter(frames)
     y_offset = 0
         videos=list(video),
         return_tensors="pt",
         padding=True,
+    ).to(device)  # Move inputs to GPU if available
     with torch.no_grad():
         outputs = model(**inputs)