Commit
·
41a0f72
1
Parent(s):
d0e8928
Update app_swin.py
Browse files- app_swin.py +7 -7
app_swin.py
CHANGED
@@ -208,7 +208,7 @@ class CreateDatasetProd():
|
|
208 |
video = self.transform_prod(video.permute(0, 3, 1, 2))
|
209 |
video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
|
210 |
pixel_values = video['pixel_values'].squeeze(0)
|
211 |
-
pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
|
212 |
|
213 |
# Force garbage collection
|
214 |
del video
|
@@ -259,17 +259,17 @@ def prod_function(model_pretrained, prod_ds):
|
|
259 |
accelerated_model.eval()
|
260 |
|
261 |
with torch.no_grad():
|
262 |
-
outputs = accelerated_model(acclerated_prod_ds.unsqueeze(0))
|
263 |
|
264 |
prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
|
265 |
prod_pred = prod_softmax.argmax(-1)
|
266 |
|
267 |
-
return prod_pred
|
268 |
|
269 |
# Function to get landmarked video
|
270 |
def save_video_to_mp4(video_tensor, fps=10):
|
271 |
# Convert pytorch tensor to numpy ndarray
|
272 |
-
video_numpy = video_tensor.permute(
|
273 |
# Normalize values to [0, 255] if necessary
|
274 |
if video_numpy.max() <= 1.0:
|
275 |
#video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8) # Unnormalize
|
@@ -298,7 +298,7 @@ def save_video_to_mp4(video_tensor, fps=10):
|
|
298 |
out.release()
|
299 |
## Return the byte buffer's content (the video as bytes)
|
300 |
#byte_buffer.seek(0)
|
301 |
-
return output_path
|
302 |
|
303 |
# Function to list available videos dynamically
|
304 |
def list_videos():
|
@@ -315,11 +315,11 @@ def play_video(selected_video):
|
|
315 |
def translate_sign_language(gesture):
|
316 |
# Create Dataset
|
317 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
318 |
-
prod_video_path
|
319 |
#prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
|
320 |
|
321 |
# Run ML Model
|
322 |
-
predicted_prod_label = prod_function(recon_model, prod_ds)
|
323 |
|
324 |
# Identify the hand gesture
|
325 |
predicted_prod_label = predicted_prod_label.squeeze(0)
|
|
|
208 |
video = self.transform_prod(video.permute(0, 3, 1, 2))
|
209 |
video = self.image_processor(list(video), return_tensors='pt', input_data_format='channels_first')
|
210 |
pixel_values = video['pixel_values'].squeeze(0)
|
211 |
+
#pixel_values = pixel_values.permute(1, 0, 2, 3).to(device)
|
212 |
|
213 |
# Force garbage collection
|
214 |
del video
|
|
|
259 |
accelerated_model.eval()
|
260 |
|
261 |
with torch.no_grad():
|
262 |
+
outputs = accelerated_model(acclerated_prod_ds.permute(1, 0, 2, 3).unsqueeze(0))
|
263 |
|
264 |
prod_softmax = torch.nn.functional.softmax(outputs, dim=-1)
|
265 |
prod_pred = prod_softmax.argmax(-1)
|
266 |
|
267 |
+
return prod_pred, acclerated_prod_ds
|
268 |
|
269 |
# Function to get landmarked video
|
270 |
def save_video_to_mp4(video_tensor, fps=10):
|
271 |
# Convert pytorch tensor to numpy ndarray
|
272 |
+
video_numpy = video_tensor.permute(0, 2, 3, 1).cpu().numpy()
|
273 |
# Normalize values to [0, 255] if necessary
|
274 |
if video_numpy.max() <= 1.0:
|
275 |
#video_numpy = (video_numpy / 2 + 0.5).astype(np.uint8) # Unnormalize
|
|
|
298 |
out.release()
|
299 |
## Return the byte buffer's content (the video as bytes)
|
300 |
#byte_buffer.seek(0)
|
301 |
+
return output_path #byte_buffer.read()
|
302 |
|
303 |
# Function to list available videos dynamically
|
304 |
def list_videos():
|
|
|
315 |
def translate_sign_language(gesture):
|
316 |
# Create Dataset
|
317 |
prod_ds = dataset_prod_obj.create_dataset(gesture)
|
318 |
+
prod_video_path = save_video_to_mp4(prod_ds)
|
319 |
#prod_video = np.random.randint(0, 255, (32, 225, 225, 3), dtype=np.uint8)
|
320 |
|
321 |
# Run ML Model
|
322 |
+
predicted_prod_label, video_tensor = prod_function(recon_model, prod_ds)
|
323 |
|
324 |
# Identify the hand gesture
|
325 |
predicted_prod_label = predicted_prod_label.squeeze(0)
|