IbrahimHasani commited on
Commit
f2ea5a0
·
1 Parent(s): d7eab74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -3,15 +3,17 @@ import torch
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
- import cv2
7
 
8
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
9
  CLIP_LEN = 32
10
 
11
- # Load model and processor once
12
- processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
- model = AutoModel.from_pretrained(MODEL_NAME)
14
 
 
 
 
15
 
16
  def get_video_length(file_path):
17
  cap = cv2.VideoCapture(file_path)
@@ -49,13 +51,7 @@ def concatenate_frames(frames, clip_len):
49
  for i in range(rows):
50
  x_offset = 0
51
  for j in range(cols):
52
- img_array = next(frame_iter)
53
-
54
- # Handling rank-4 tensor
55
- if len(img_array.shape) == 4:
56
- img_array = img_array[0]
57
-
58
- img = Image.fromarray(img_array)
59
  combined_image.paste(img, (x_offset, y_offset))
60
  x_offset += frames[0].shape[1]
61
  y_offset += frames[0].shape[0]
@@ -75,6 +71,11 @@ def model_interface(uploaded_video, activity):
75
  padding=True,
76
  )
77
 
 
 
 
 
 
78
  with torch.no_grad():
79
  outputs = model(**inputs)
80
 
@@ -86,13 +87,13 @@ def model_interface(uploaded_video, activity):
86
  max_prob_index = torch.argmax(probs[0]).item()
87
  for i in range(len(activities_list)):
88
  current_activity = activities_list[i]
89
- prob = float(probs[0][i])
90
- logit = float(logits_per_video[0][i])
91
  results_probs.append((current_activity, f"Probability: {prob * 100:.2f}%"))
92
  results_logits.append((current_activity, f"Raw Score: {logit:.2f}"))
93
 
94
  likely_label = activities_list[max_prob_index]
95
- likely_probability = float(probs[0][max_prob_index]) * 100
96
 
97
  return concatenated_image, results_probs, results_logits, [likely_label, likely_probability]
98
 
 
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
+ import cv2
7
 
8
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
9
  CLIP_LEN = 32
10
 
11
+ # Check if GPU is available and set the device
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
13
 
14
+ # Load model and processor once and move them to the device
15
+ processor = AutoProcessor.from_pretrained(MODEL_NAME)
16
+ model = AutoModel.from_pretrained(MODEL_NAME).to(device)
17
 
18
  def get_video_length(file_path):
19
  cap = cv2.VideoCapture(file_path)
 
51
  for i in range(rows):
52
  x_offset = 0
53
  for j in range(cols):
54
+ img = Image.fromarray(next(frame_iter))
 
 
 
 
 
 
55
  combined_image.paste(img, (x_offset, y_offset))
56
  x_offset += frames[0].shape[1]
57
  y_offset += frames[0].shape[0]
 
71
  padding=True,
72
  )
73
 
74
+ # Move the tensors to the same device as the model
75
+ for key, value in inputs.items():
76
+ if isinstance(value, torch.Tensor):
77
+ inputs[key] = value.to(device)
78
+
79
  with torch.no_grad():
80
  outputs = model(**inputs)
81
 
 
87
  max_prob_index = torch.argmax(probs[0]).item()
88
  for i in range(len(activities_list)):
89
  current_activity = activities_list[i]
90
+ prob = float(probs[0][i].cpu()) # Move tensor data to CPU for further processing
91
+ logit = float(logits_per_video[0][i].cpu()) # Move tensor data to CPU for further processing
92
  results_probs.append((current_activity, f"Probability: {prob * 100:.2f}%"))
93
  results_logits.append((current_activity, f"Raw Score: {logit:.2f}"))
94
 
95
  likely_label = activities_list[max_prob_index]
96
+ likely_probability = float(probs[0][max_prob_index].cpu()) * 100 # Move tensor data to CPU
97
 
98
  return concatenated_image, results_probs, results_logits, [likely_label, likely_probability]
99