Spaces:

Kaushik066
/

indian_sign_language_translation

Running

App Files Files Community

Kaushik066 commited on Mar 18

Commit

41a0f72

1 Parent(s): d0e8928

Update app_swin.py

Browse files

Files changed (1) hide show

app_swin.py +7 -7

app_swin.py CHANGED Viewed

@@ -208,7 +208,7 @@ class CreateDatasetProd():
         video = self.transform_prod(video.permute(0, 3, 1, 2))
         video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
         pixel_values = video['pixel_values'].squeeze(0)
-        pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
         # Force garbage collection
         del video
@@ -259,17 +259,17 @@ def prod_function(model_pretrained, prod_ds):
     accelerated_model.eval()
     with torch.no_grad():
-        outputs = accelerated_model(acclerated_prod_ds.unsqueeze(0))
     prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
     prod_pred = prod_softmax.argmax(-1)
-    return prod_pred
 # Function to get landmarked video
 def save_video_to_mp4(video_tensor, fps=10):
     # Convert pytorch tensor to numpy ndarray
-    video_numpy = video_tensor.permute(1, 2, 3, 0).cpu().numpy()
     # Normalize values to [0, 255] if necessary
     if video_numpy.max() <= 1.0:
         #video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8)  # Unnormalize
@@ -298,7 +298,7 @@ def save_video_to_mp4(video_tensor, fps=10):
     out.release()
     ## Return the byte buffer's content (the video as bytes)
     #byte_buffer.seek(0)
-    return output_path, video_numpy #byte_buffer.read()
 # Function to list available videos dynamically
 def list_videos():
@@ -315,11 +315,11 @@ def play_video(selected_video):
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
-    prod_video_path, video_tensor = save_video_to_mp4(prod_ds)
     #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
     # Run ML Model
-    predicted_prod_label = prod_function(recon_model, prod_ds)
     # Identify the hand gesture
     predicted_prod_label = predicted_prod_label.squeeze(0)

         video = self.transform_prod(video.permute(0, 3, 1, 2))
         video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
         pixel_values = video['pixel_values'].squeeze(0)
+        #pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
         # Force garbage collection
         del video
     accelerated_model.eval()
     with torch.no_grad():
+        outputs = accelerated_model(acclerated_prod_ds.permute(1, 0, 2, 3).unsqueeze(0))
     prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
     prod_pred = prod_softmax.argmax(-1)
+    return prod_pred, acclerated_prod_ds
 # Function to get landmarked video
 def save_video_to_mp4(video_tensor, fps=10):
     # Convert pytorch tensor to numpy ndarray
+    video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
     # Normalize values to [0, 255] if necessary
     if video_numpy.max() <= 1.0:
         #video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8)  # Unnormalize
     out.release()
     ## Return the byte buffer's content (the video as bytes)
     #byte_buffer.seek(0)
+    return output_path #byte_buffer.read()
 # Function to list available videos dynamically
 def list_videos():
 def translate_sign_language(gesture):
     # Create Dataset
     prod_ds = dataset_prod_obj.create_dataset(gesture)
+    prod_video_path = save_video_to_mp4(prod_ds)
     #prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
     # Run ML Model
+    predicted_prod_label, video_tensor = prod_function(recon_model, prod_ds)
     # Identify the hand gesture
     predicted_prod_label = predicted_prod_label.squeeze(0)