Spaces:

IbrahimHasani
/

ActionDetectionVideo

Runtime error

IbrahimHasani commited on Aug 16, 2023

Commit

de71a62

1 Parent(s): 9588460

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from transformers import AutoProcessor, AutoModel
 from PIL import Image
 import cv2
 from concurrent.futures import ThreadPoolExecutor
 MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
@@ -18,20 +19,28 @@ print ("device")
 processor = AutoProcessor.from_pretrained(MODEL_NAME)
 model = AutoModel.from_pretrained(MODEL_NAME).to(device)
 def get_video_length(file_path):
-    cap = cv2.VideoCapture(file_path)
-    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
-    return length
-def read_video_opencv(file_path, indices):
     frames = []
-    with ThreadPoolExecutor() as executor:
-        futures = [executor.submit(get_frame, file_path, i) for i in indices]
-        for future in futures:
-            frame = future.result()
-            if frame is not None:
-                frames.append(frame)
     return frames
 def get_frame(file_path, index):
@@ -71,7 +80,7 @@ def concatenate_frames(frames, clip_len):
 def model_interface(uploaded_video, activity):
     video_length = get_video_length(uploaded_video)
     indices = sample_uniform_frame_indices(CLIP_LEN, seg_len=video_length)
-    video = read_video_opencv(uploaded_video, indices)
     concatenated_image = concatenate_frames(video, CLIP_LEN)
     activities_list = [activity, "other"]

 from PIL import Image
 import cv2
 from concurrent.futures import ThreadPoolExecutor
+import PyNvCodec as nvc
 MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
 processor = AutoProcessor.from_pretrained(MODEL_NAME)
 model = AutoModel.from_pretrained(MODEL_NAME).to(device)
 def get_video_length(file_path):
+    decoder = nvc.PyNvDecoder(file_path, 0)  # 0 indicates GPU ID
+    return decoder.FramesCount()
+def read_video_nvcodec(file_path, indices):
     frames = []
+    decoder = nvc.PyNvDecoder(file_path, 0)  # 0 indicates GPU ID
+    nv12_surf_plane = nvc.PySurface()
+    for i in range(max(indices) + 1):
+        success = decoder.DecodeSingleSurface(nv12_surf_plane)
+        if not success:
+            break
+        if i in indices:
+            rgb_surf = nv12_surf_plane.ToColor(nvc.PixelFormat.RGB)
+            h, w, c = rgb_surf.HostShape()
+            frame = np.ndarray(shape=(h, w, c), dtype=np.uint8, order='C')
+            rgb_surf.Download(frame)
+            frames.append(frame)
     return frames
 def get_frame(file_path, index):
 def model_interface(uploaded_video, activity):
     video_length = get_video_length(uploaded_video)
     indices = sample_uniform_frame_indices(CLIP_LEN, seg_len=video_length)
+    video = read_video_nvcodec(uploaded_video, indices)
     concatenated_image = concatenate_frames(video, CLIP_LEN)
     activities_list = [activity, "other"]