Spaces:

reab5555
/

Multimodal-Behavioral-Anomalies-Detection

Sleeping

Multimodal-Behavioral-Anomalies-Detection

File size: 5,656 Bytes

import gradio as gr
import time
from video_processing import process_video
from PIL import Image
import matplotlib

matplotlib.rcParams['figure.dpi'] = 300
matplotlib.rcParams['savefig.dpi'] = 300

def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()):
    try:
        print("Starting video processing...")
        results = process_video(video_input_path, anomaly_threshold_input, fps, progress=progress)
        print("Video processing completed.")

        if isinstance(results[0], str) and results[0].startswith("Error"):
            print(f"Error occurred: {results[0]}")
            return [results[0]] + [None] * 21

        (exec_time, results_summary, df, mse_embeddings, mse_posture,
         mse_plot_embeddings, mse_histogram_embeddings,
         mse_plot_posture, mse_histogram_posture,
         mse_heatmap_embeddings, mse_heatmap_posture,
         face_samples_frequent,
         anomaly_faces_embeddings, anomaly_frames_posture_images,
         aligned_faces_folder, frames_folder,
         mse_voice, mse_plot_voice, mse_histogram_voice, mse_heatmap_voice,
         anomaly_segments_voice) = results

        anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings]
        anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images]

        face_samples_frequent = [Image.open(path) for path in face_samples_frequent]

        output = [
            exec_time, results_summary,
            df, mse_embeddings, mse_posture,
            mse_plot_embeddings, mse_plot_posture,
            mse_histogram_embeddings, mse_histogram_posture,
            mse_heatmap_embeddings, mse_heatmap_posture,
            anomaly_faces_embeddings_pil, anomaly_frames_posture_pil,
            face_samples_frequent,
            aligned_faces_folder, frames_folder,
            mse_embeddings, mse_posture,
            mse_plot_voice, mse_histogram_voice, mse_heatmap_voice, anomaly_segments_voice
        ]

        return output

    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        print(error_message)
        import traceback
        traceback.print_exc()
        return [error_message] + [None] * 21  # Adjust this number based on your total outputs

with gr.Blocks() as iface:
    gr.Markdown("""
    # Multimodal Behavioral Anomalies Detection

    This tool detects anomalies in facial expressions, body language, and voice over the timeline of a video.
    It extracts faces, postures, and voice features from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach.
    """)

    with gr.Row():
        video_input = gr.Video()

    anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)")
    fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)")
    process_btn = gr.Button("Detect Anomalies")
    progress_bar = gr.Progress()
    execution_time = gr.Number(label="Execution Time (seconds)")

    with gr.Group(visible=False) as results_group:
        results_text = gr.TextArea(label="Anomaly Detection Results", lines=4)

        with gr.Tab("Facial Features"):
            mse_features_plot = gr.Plot(label="MSE: Facial Features")
            mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features")
            mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features")
            anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto")
            face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto")

        with gr.Tab("Body Posture"):
            mse_posture_plot = gr.Plot(label="MSE: Body Posture")
            mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture")
            mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture")
            anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto")

        with gr.Tab("Voice"):
            mse_voice_plot = gr.Plot(label="MSE: Voice")
            mse_voice_hist = gr.Plot(label="MSE Distribution: Voice")
            mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice")
            anomaly_segments_voice = gr.Audio(label="Anomaly Voice Segments", type="filepath")

    df_store = gr.State()
    mse_features_store = gr.State()
    mse_posture_store = gr.State()
    aligned_faces_folder_store = gr.State()
    frames_folder_store = gr.State()
    mse_heatmap_embeddings_store = gr.State()
    mse_heatmap_posture_store = gr.State()

    process_btn.click(
        process_and_show_completion,
        inputs=[video_input, anomaly_threshold, fps_slider],
        outputs=[
            execution_time, results_text, df_store,
            mse_features_store, mse_posture_store,
            mse_features_plot, mse_posture_plot,
            mse_features_hist, mse_posture_hist,
            mse_features_heatmap, mse_posture_heatmap,
            anomaly_frames_features, anomaly_frames_posture,
            face_samples_most_frequent,
            aligned_faces_folder_store, frames_folder_store,
            mse_heatmap_embeddings_store, mse_heatmap_posture_store,
            mse_voice_plot, mse_voice_hist, mse_voice_heatmap, anomaly_segments_voice
        ]
    ).then(
        lambda: gr.Group(visible=True),
        inputs=None,
        outputs=[results_group]
    )

if __name__ == "__main__":
    iface.launch()