import gradio as gr from video_processor.processor import VideoAnalyzer, get_video_duration_seconds import logging import torch import spaces import time # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Print version information logger.info(f"PyTorch version: {torch.__version__}") logger.info(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): logger.info(f"CUDA version: {torch.version.cuda}") logger.info(f"GPU device: {torch.cuda.get_device_name(0)}") @spaces.GPU def on_process(video): start_time = time.time() # Clear all components when starting new processing yield [ "", # Clear status "", # Clear description gr.update(visible=False) # Hide accordion ] if not video: yield [ "Please upload a video", "", gr.update(visible=False) ] return try: # Initialize analyzer init_start = time.time() yield [ "Initializing video analyzer...", "", gr.update(visible=False) ] analyzer = VideoAnalyzer() init_time = time.time() - init_start logger.info(f"Initialization took {init_time:.2f} seconds") # Process video yield [ f"Model initialized in {init_time:.2f}s. Starting analysis...", "", gr.update(visible=True) ] logger.info(f"Processing video: {video}") # Get duration and calculate total segments duration = get_video_duration_seconds(video) total_segments = (int(duration) + 9) // 10 # Ceiling division for 10-second segments # Process video segments yield [ f"Processing video... (Will analyze {total_segments} segments)", "", gr.update(visible=True) ] # Process segments and show progress segments = [] total_ffmpeg_time = 0 total_inference_time = 0 for i, segment in enumerate(analyzer.process_video(video)): segments.append(segment) # Update timing totals total_ffmpeg_time += segment['processing_times']['ffmpeg'] total_inference_time += segment['processing_times']['inference'] progress = int((i + 1) / total_segments * 100) avg_ffmpeg_time = total_ffmpeg_time / (i + 1) avg_inference_time = total_inference_time / (i + 1) remaining_segments = total_segments - (i + 1) estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time) # Format current segments formatted_desc = "### Video Analysis by Segments:\n\n" for seg in segments: formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n" yield [ f"Processing segments... {progress}% complete\n" + f"Segment {i+1}/{total_segments}\n" + f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" + f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" + f"Estimated time remaining: {estimated_remaining:.2f}s", formatted_desc, gr.update(visible=True) ] total_time = time.time() - start_time yield [ f"Processing complete!\n" + f"Total processing time: {total_time:.2f}s\n" + f"Average per segment:\n" + f" - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" + f" - Inference: {total_inference_time/total_segments:.2f}s\n" + f" - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s", formatted_desc, gr.update(visible=True) ] except Exception as e: logger.exception("Error processing video") yield [ f"Error processing video: {str(e)}", "", gr.update(visible=False) ] finally: # Clean up torch.cuda.empty_cache() # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# SmolVLM Video Analyzer") gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.") with gr.Row(): with gr.Column(scale=1): input_video = gr.Video( label="Upload your video", interactive=True ) process_btn = gr.Button("Process Video", variant="primary") with gr.Column(scale=1): status = gr.Markdown() analysis_accordion = gr.Accordion( "Analysis Details", open=True, visible=False ) with analysis_accordion: video_description = gr.Markdown("") process_btn.click( on_process, inputs=[input_video], outputs=[ status, video_description, analysis_accordion ], queue=True, ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )