reab5555 commited on
Commit
8b78d8c
·
verified ·
1 Parent(s): b568300

Update video_processing.py

Browse files
Files changed (1) hide show
  1. video_processing.py +30 -1
video_processing.py CHANGED
@@ -15,7 +15,8 @@ import pandas as pd
15
  from facenet_pytorch import MTCNN
16
  import torch
17
  import mediapipe as mp
18
-
 
19
 
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
@@ -160,6 +161,21 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
160
 
161
  mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  progress(0.95, "Generating plots")
164
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
165
  color=GRAPH_COLORS['facial_embeddings'],
@@ -179,6 +195,14 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
179
 
180
  mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
181
 
 
 
 
 
 
 
 
 
182
  except Exception as e:
183
  print(f"Error details: {str(e)}")
184
  import traceback
@@ -241,6 +265,11 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
241
  mse_histogram_posture,
242
  mse_heatmap_embeddings,
243
  mse_heatmap_posture,
 
 
 
 
 
244
  face_samples["most_frequent"],
245
  anomaly_faces_embeddings,
246
  anomaly_frames_posture_images,
 
15
  from facenet_pytorch import MTCNN
16
  import torch
17
  import mediapipe as mp
18
+ from voice_analysis import process_audio, cluster_voices, get_most_frequent_voice
19
+ from pydub import AudioSegment
20
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
  mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
 
161
 
162
  mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
163
 
164
+ # Extract audio from video
165
+ video = AudioSegment.from_file(video_path, "mp4")
166
+ audio_path = os.path.join(temp_dir, "audio.wav")
167
+ video.export(audio_path, format="wav")
168
+
169
+ # Process audio
170
+ voice_embeddings = process_audio(audio_path)
171
+ voice_clusters = cluster_voices(voice_embeddings)
172
+ most_frequent_voice = get_most_frequent_voice(voice_embeddings, voice_clusters)
173
+
174
+ # Perform anomaly detection on voice
175
+ X_voice = np.array(most_frequent_voice)
176
+ mse_voice = anomaly_detection(X_voice, X_voice) # Using the same function as for facial features
177
+
178
+
179
  progress(0.95, "Generating plots")
180
  mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
181
  color=GRAPH_COLORS['facial_embeddings'],
 
195
 
196
  mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
197
 
198
+ # Generate plots for voice
199
+ mse_plot_voice, anomaly_segments_voice = plot_mse(df, mse_voice, "Voice",
200
+ color='green',
201
+ anomaly_threshold=anomaly_threshold)
202
+ mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
203
+ anomaly_threshold, color='green')
204
+ mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
205
+
206
  except Exception as e:
207
  print(f"Error details: {str(e)}")
208
  import traceback
 
265
  mse_histogram_posture,
266
  mse_heatmap_embeddings,
267
  mse_heatmap_posture,
268
+ mse_voice,
269
+ mse_plot_voice,
270
+ mse_histogram_voice,
271
+ mse_heatmap_voice,
272
+ anomaly_segments_voice,
273
  face_samples["most_frequent"],
274
  anomaly_faces_embeddings,
275
  anomaly_frames_posture_images,