Kaushik066 commited on
Commit
41a0f72
·
1 Parent(s): d0e8928

Update app_swin.py

Browse files
Files changed (1) hide show
  1. app_swin.py +7 -7
app_swin.py CHANGED
@@ -208,7 +208,7 @@ class CreateDatasetProd():
208
  video = self.transform_prod(video.permute(0, 3, 1, 2))
209
  video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
210
  pixel_values = video['pixel_values'].squeeze(0)
211
- pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
212
 
213
  # Force garbage collection
214
  del video
@@ -259,17 +259,17 @@ def prod_function(model_pretrained, prod_ds):
259
  accelerated_model.eval()
260
 
261
  with torch.no_grad():
262
- outputs = accelerated_model(acclerated_prod_ds.unsqueeze(0))
263
 
264
  prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
265
  prod_pred = prod_softmax.argmax(-1)
266
 
267
- return prod_pred
268
 
269
  # Function to get landmarked video
270
  def save_video_to_mp4(video_tensor, fps=10):
271
  # Convert pytorch tensor to numpy ndarray
272
- video_numpy = video_tensor.permute(1, 2, 3, 0).cpu().numpy()
273
  # Normalize values to [0, 255] if necessary
274
  if video_numpy.max() <= 1.0:
275
  #video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8) # Unnormalize
@@ -298,7 +298,7 @@ def save_video_to_mp4(video_tensor, fps=10):
298
  out.release()
299
  ## Return the byte buffer's content (the video as bytes)
300
  #byte_buffer.seek(0)
301
- return output_path, video_numpy #byte_buffer.read()
302
 
303
  # Function to list available videos dynamically
304
  def list_videos():
@@ -315,11 +315,11 @@ def play_video(selected_video):
315
  def translate_sign_language(gesture):
316
  # Create Dataset
317
  prod_ds = dataset_prod_obj.create_dataset(gesture)
318
- prod_video_path, video_tensor = save_video_to_mp4(prod_ds)
319
  #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
320
 
321
  # Run ML Model
322
- predicted_prod_label = prod_function(recon_model, prod_ds)
323
 
324
  # Identify the hand gesture
325
  predicted_prod_label = predicted_prod_label.squeeze(0)
 
208
  video = self.transform_prod(video.permute(0, 3, 1, 2))
209
  video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
210
  pixel_values = video['pixel_values'].squeeze(0)
211
+ #pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
212
 
213
  # Force garbage collection
214
  del video
 
259
  accelerated_model.eval()
260
 
261
  with torch.no_grad():
262
+ outputs = accelerated_model(acclerated_prod_ds.permute(1, 0, 2, 3).unsqueeze(0))
263
 
264
  prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
265
  prod_pred = prod_softmax.argmax(-1)
266
 
267
+ return prod_pred, acclerated_prod_ds
268
 
269
  # Function to get landmarked video
270
  def save_video_to_mp4(video_tensor, fps=10):
271
  # Convert pytorch tensor to numpy ndarray
272
+ video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
273
  # Normalize values to [0, 255] if necessary
274
  if video_numpy.max() <= 1.0:
275
  #video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8) # Unnormalize
 
298
  out.release()
299
  ## Return the byte buffer's content (the video as bytes)
300
  #byte_buffer.seek(0)
301
+ return output_path #byte_buffer.read()
302
 
303
  # Function to list available videos dynamically
304
  def list_videos():
 
315
  def translate_sign_language(gesture):
316
  # Create Dataset
317
  prod_ds = dataset_prod_obj.create_dataset(gesture)
318
+ prod_video_path = save_video_to_mp4(prod_ds)
319
  #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
320
 
321
  # Run ML Model
322
+ predicted_prod_label, video_tensor = prod_function(recon_model, prod_ds)
323
 
324
  # Identify the hand gesture
325
  predicted_prod_label = predicted_prod_label.squeeze(0)