Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Running

App Files Files Community

reab5555 commited on Jul 30, 2024

Commit

6e79c57

verified ·

1 Parent(s): 9c87667

Update visualization.py

Browse files

Files changed (1) hide show

visualization.py +35 -11

visualization.py CHANGED Viewed

@@ -230,7 +230,7 @@ def create_heatmap(t, mse_embeddings, mse_posture, mse_voice, video_fps, total_f
     combined_mse[1] = mse_posture_norm
     combined_mse[2] = mse_voice_norm
-    fig, ax = plt.subplots(figsize=(video_width / 300, 0.5))
     ax.imshow(combined_mse, aspect='auto', cmap='Reds', vmin=0, vmax=1, extent=[0, total_frames, 0, 3])
     ax.set_yticks([0.5, 1.5, 2.5])
     ax.set_yticklabels(['Voice', 'Posture', 'Face'], fontsize=7)
@@ -264,25 +264,49 @@ def create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_v
     width, height = video.w, video.h
     total_frames = int(video.duration * video.fps)
-    def fill_with_zeros_and_values(mse_array, total_frames):
         result = np.zeros(total_frames)
         indices = np.linspace(0, total_frames - 1, len(mse_array)).astype(int)
         result[indices] = mse_array
         return result
     # Ensure all MSE arrays have the same length as total_frames
-    mse_embeddings = fill_with_zeros_and_values(mse_embeddings, total_frames)
-    mse_posture = fill_with_zeros_and_values(mse_posture, total_frames)
-    mse_voice = fill_with_zeros_and_values(mse_voice, total_frames)
-    def combine_video_and_heatmap(t):
-        video_frame = video.get_frame(t)
-        heatmap_frame = create_heatmap(t, mse_embeddings, mse_posture, mse_voice, video.fps, total_frames, width)
-        heatmap_frame_resized = cv2.resize(heatmap_frame, (width, heatmap_frame.shape[0]))
-        combined_frame = np.vstack((video_frame, heatmap_frame_resized))
         return combined_frame
-    final_clip = VideoClip(combine_video_and_heatmap, duration=video.duration)
     final_clip = final_clip.set_audio(video.audio)
     # Write the final video

     combined_mse[1] = mse_posture_norm
     combined_mse[2] = mse_voice_norm
+    fig, ax = plt.subplots(figsize=(video_width / 300, 0.4))
     ax.imshow(combined_mse, aspect='auto', cmap='Reds', vmin=0, vmax=1, extent=[0, total_frames, 0, 3])
     ax.set_yticks([0.5, 1.5, 2.5])
     ax.set_yticklabels(['Voice', 'Posture', 'Face'], fontsize=7)
     width, height = video.w, video.h
     total_frames = int(video.duration * video.fps)
+    def fill_with_previous_values(mse_array, total_frames):
         result = np.zeros(total_frames)
         indices = np.linspace(0, total_frames - 1, len(mse_array)).astype(int)
         result[indices] = mse_array
+        for i in range(1, total_frames):
+            if result[i] == 0:
+                result[i] = result[i - 1]
         return result
     # Ensure all MSE arrays have the same length as total_frames
+    mse_embeddings = fill_with_previous_values(mse_embeddings, total_frames)
+    mse_posture = fill_with_previous_values(mse_posture, total_frames)
+    mse_voice = fill_with_previous_values(mse_voice, total_frames)
+    def create_heatmap(t, mse_embeddings, mse_posture, mse_voice, fps, total_frames, width):
+        frame_index = int(t * fps)
+        mse_face = mse_embeddings[frame_index]
+        mse_body = mse_posture[frame_index]
+        mse_audio = mse_voice[frame_index]
+        max_mse_face = np.max(mse_embeddings)
+        max_mse_body = np.max(mse_posture)
+        max_mse_audio = np.max(mse_voice)
+        def get_heatmap_color(mse_value, max_mse_value):
+            normalized_value = mse_value / max_mse_value if max_mse_value != 0 else 0
+            color = plt.cm.hot(normalized_value)
+            return (color[0] * 255, color[1] * 255, color[2] * 255)  # Convert to RGB
+        heatmap_face = np.full((height // 3, width, 3), get_heatmap_color(mse_face, max_mse_face), dtype=np.uint8)
+        heatmap_body = np.full((height // 3, width, 3), get_heatmap_color(mse_body, max_mse_body), dtype=np.uint8)
+        heatmap_audio = np.full((height // 3, width, 3), get_heatmap_color(mse_audio, max_mse_audio), dtype=np.uint8)
+        heatmap = np.vstack((heatmap_face, heatmap_body, heatmap_audio))
+        return heatmap
+    def combine_video_and_heatmap(get_frame, t):
+        video_frame = get_frame(t)
+        heatmap_frame = create_heatmap(t, mse_embeddings, mse_posture, mse_voice, desired_fps, total_frames, width)
+        combined_frame = np.vstack((video_frame, heatmap_frame))
         return combined_frame
+    final_clip = video.fl(combine_video_and_heatmap)
     final_clip = final_clip.set_audio(video.audio)
     # Write the final video