File size: 5,481 Bytes
5f42812
378b5d5
5f42812
d40303a
 
e927231
5f42812
 
 
 
 
bd727fa
 
 
 
 
 
 
d40303a
 
e927231
 
d40303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f42812
d40303a
e927231
d40303a
 
 
 
 
 
 
e927231
 
d40303a
 
 
e927231
d40303a
 
 
 
 
630bec9
 
 
9a80e6e
d40303a
9a80e6e
630bec9
e927231
630bec9
 
 
 
9a80e6e
 
b841197
 
e927231
9a80e6e
 
b841197
 
 
 
e927231
0820857
b841197
 
e927231
b841197
0820857
 
 
9a80e6e
 
0820857
 
e927231
b841197
 
 
e927231
0820857
 
 
d40303a
e927231
d40303a
e927231
 
b841197
 
 
 
d40303a
 
 
 
5f42812
d40303a
 
 
 
 
 
 
 
 
5f42812
 
d40303a
 
0820857
d40303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f42812
 
d40303a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
from video_processor.processor import VideoAnalyzer, get_video_duration_seconds
import logging
import torch
import spaces
import time

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Print version information
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    logger.info(f"CUDA version: {torch.version.cuda}")
    logger.info(f"GPU device: {torch.cuda.get_device_name(0)}")

@spaces.GPU
def on_process(video):
    start_time = time.time()
    
    # Clear all components when starting new processing
    yield [
        "",  # Clear status
        "",  # Clear description
        gr.update(visible=False)  # Hide accordion
    ]
    
    if not video:
        yield [
            "Please upload a video",
            "",
            gr.update(visible=False)
        ]
        return
    
    try:
        # Initialize analyzer
        init_start = time.time()
        yield [
            "Initializing video analyzer...",
            "",
            gr.update(visible=False)
        ]
        
        analyzer = VideoAnalyzer()
        init_time = time.time() - init_start
        logger.info(f"Initialization took {init_time:.2f} seconds")
        
        # Process video
        yield [
            f"Model initialized in {init_time:.2f}s. Starting analysis...",
            "",
            gr.update(visible=True)
        ]
        
        logger.info(f"Processing video: {video}")
        
        # Get duration and calculate total segments
        duration = get_video_duration_seconds(video)
        total_segments = (int(duration) + 9) // 10  # Ceiling division for 10-second segments
        
        # Process video segments
        yield [
            f"Processing video... (Will analyze {total_segments} segments)",
            "",
            gr.update(visible=True)
        ]
        
        # Process segments and show progress
        segments = []
        total_ffmpeg_time = 0
        total_inference_time = 0
        
        for i, segment in enumerate(analyzer.process_video(video)):
            segments.append(segment)
            
            # Update timing totals
            total_ffmpeg_time += segment['processing_times']['ffmpeg']
            total_inference_time += segment['processing_times']['inference']
            
            progress = int((i + 1) / total_segments * 100)
            avg_ffmpeg_time = total_ffmpeg_time / (i + 1)
            avg_inference_time = total_inference_time / (i + 1)
            remaining_segments = total_segments - (i + 1)
            estimated_remaining = remaining_segments * (avg_ffmpeg_time + avg_inference_time)
            
            # Format current segments
            formatted_desc = "### Video Analysis by Segments:\n\n"
            for seg in segments:
                formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n"
            
            yield [
                f"Processing segments... {progress}% complete\n" +
                f"Segment {i+1}/{total_segments}\n" +
                f"FFmpeg processing: {segment['processing_times']['ffmpeg']:.2f}s (avg: {avg_ffmpeg_time:.2f}s)\n" +
                f"Model inference: {segment['processing_times']['inference']:.2f}s (avg: {avg_inference_time:.2f}s)\n" +
                f"Estimated time remaining: {estimated_remaining:.2f}s",
                formatted_desc,
                gr.update(visible=True)
            ]
        
        total_time = time.time() - start_time
        yield [
            f"Processing complete!\n" +
            f"Total processing time: {total_time:.2f}s\n" +
            f"Average per segment:\n" +
            f"  - FFmpeg: {total_ffmpeg_time/total_segments:.2f}s\n" +
            f"  - Inference: {total_inference_time/total_segments:.2f}s\n" +
            f"  - Total: {(total_ffmpeg_time + total_inference_time)/total_segments:.2f}s",
            formatted_desc,
            gr.update(visible=True)
        ]
        
    except Exception as e:
        logger.exception("Error processing video")
        yield [
            f"Error processing video: {str(e)}",
            "",
            gr.update(visible=False)
        ]
    finally:
        # Clean up
        torch.cuda.empty_cache()

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# SmolVLM Video Analyzer")
    gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_video = gr.Video(
                label="Upload your video",
                interactive=True
            )
            process_btn = gr.Button("Process Video", variant="primary")
            
        with gr.Column(scale=1):
            status = gr.Markdown()
            analysis_accordion = gr.Accordion(
                "Analysis Details",
                open=True,
                visible=False
            )
            with analysis_accordion:
                video_description = gr.Markdown("")

    process_btn.click(
        on_process,
        inputs=[input_video],
        outputs=[
            status,
            video_description,
            analysis_accordion
        ],
        queue=True,
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )