Spaces:

becteur92
/

smollvm

Paused

File size: 5,481 Bytes

import gradio as gr
from video_processor.processor import VideoAnalyzer, get_video_duration_seconds
import logging
import torch
import spaces
import time

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Print version information
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    logger.info(f"CUDA version: {torch.version.cuda}")
    logger.info(f"GPU device: {torch.cuda.get_device_name(0)}")

@spaces.GPU
def on_process(video):
    start_time = time.time()
    
    # Clear all components when starting new processing
    yield [
        "",  # Clear status
        "",  # Clear description
        gr.update(visible=False)  # Hide accordion
    ]
    
    if not video:
        yield [
            "Please upload a video",
            "",
            gr.update(visible=False)
        ]
        return
    
    try:
        # Initialize analyzer
        init_start = time.time()
        yield [
            "Initializing video analyzer...",
            "",
            gr.update(visible=False)
        ]
        
        analyzer = VideoAnalyzer()
        init_time = time.time() - init_start
        logger.info(f"Initialization took {init_time:.2f} seconds")
        
        # Process video
        yield [
            f"Model initialized in {init_time:.2f}s. Starting analysis...",
            "",
            gr.update(visible=True)
        ]
        
        logger.info(f"Processing video: {video}")
        
        # Get duration and calculate total segments
        duration = get_video_duration_seconds(video)
        total_segments = (int(duration) + 9) // 10  # Ceiling division for 10-second segments
        
        # Process video segments
        yield [
            f"Processing video... (Will analyze {total_segments} segments)",
            "",
            gr.update(visible=True)
        ]
        
        # Process segments and show progress
        segments = []
        total_ffmpeg_time = 0
        total_inference_time = 0
        
        for i, segment in enumerate(analyzer.process_video(video)):
            segments.append(segment)
            
            # Update timing totals
            total_ffmpeg_time += segment['processing_times']['ffmpeg']
            total_inference_time += segment['processing_times']['inference']
            
            progress = int((i + 1) / total_segments * 100)
            avg_ffmpeg_time = total_ffmpeg_time / (i + 1)
            avg_inference_time = total_inference_time / (i + 1)
            remaining_segments = total_segments - (i + 1)
            estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time)
            
            # Format current segments
            formatted_desc = "### Video Analysis by Segments:\n\n"
            for seg in segments:
                formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n"
            
            yield [
                f"Processing segments... {progress}% complete\n" +
                f"Segment {i+1}/{total_segments}\n" +
                f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" +
                f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" +
                f"Estimated time remaining: {estimated_remaining:.2f}s",
                formatted_desc,
                gr.update(visible=True)
            ]
        
        total_time = time.time() - start_time
        yield [
            f"Processing complete!\n" +
            f"Total processing time: {total_time:.2f}s\n" +
            f"Average per segment:\n" +
            f"  - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" +
            f"  - Inference: {total_inference_time/total_segments:.2f}s\n" +
            f"  - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s",
            formatted_desc,
            gr.update(visible=True)
        ]
        
    except Exception as e:
        logger.exception("Error processing video")
        yield [
            f"Error processing video: {str(e)}",
            "",
            gr.update(visible=False)
        ]
    finally:
        # Clean up
        torch.cuda.empty_cache()

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# SmolVLM Video Analyzer")
    gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_video = gr.Video(
                label="Upload your video",
                interactive=True
            )
            process_btn = gr.Button("Process Video", variant="primary")
            
        with gr.Column(scale=1):
            status = gr.Markdown()
            analysis_accordion = gr.Accordion(
                "Analysis Details",
                open=True,
                visible=False
            )
            with analysis_accordion:
                video_description = gr.Markdown("")

    process_btn.click(
        on_process,
        inputs=[input_video],
        outputs=[
            status,
            video_description,
            analysis_accordion
        ],
        queue=True,
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )