File size: 3,947 Bytes
5f42812
378b5d5
5f42812
d40303a
 
5f42812
 
 
 
 
bd727fa
 
 
 
 
 
 
d40303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f42812
d40303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630bec9
 
 
9a80e6e
d40303a
9a80e6e
630bec9
9a80e6e
630bec9
 
 
 
9a80e6e
 
 
 
0820857
 
 
 
9a80e6e
 
0820857
 
9a80e6e
0820857
 
 
d40303a
 
 
 
 
 
 
5f42812
d40303a
 
 
 
 
 
 
 
 
5f42812
 
d40303a
 
0820857
d40303a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f42812
 
d40303a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
from video_processor.processor import VideoAnalyzer, get_video_duration_seconds
import logging
import torch
import spaces

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Print version information
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    logger.info(f"CUDA version: {torch.version.cuda}")
    logger.info(f"GPU device: {torch.cuda.get_device_name(0)}")

@spaces.GPU
def on_process(video):
    # Clear all components when starting new processing
    yield [
        "",  # Clear status
        "",  # Clear description
        gr.update(visible=False)  # Hide accordion
    ]
    
    if not video:
        yield [
            "Please upload a video",
            "",
            gr.update(visible=False)
        ]
        return
    
    try:
        # Initialize analyzer
        yield [
            "Initializing video analyzer...",
            "",
            gr.update(visible=False)
        ]
        
        analyzer = VideoAnalyzer()
        
        # Process video
        yield [
            "Analyzing video content...",
            "",
            gr.update(visible=True)
        ]
        
        logger.info(f"Processing video: {video}")
        
        # Get duration and calculate total segments
        duration = get_video_duration_seconds(video)
        total_segments = (int(duration) + 9) // 10  # Ceiling division for 10-second segments
        
        # Process video segments
        yield [
            f"Processing video... (This will process {total_segments} segments)",
            "",
            gr.update(visible=True)
        ]
        
        # Process segments and show progress
        segments = []
        for i, segment in enumerate(analyzer.process_video(video)):
            segments.append(segment)
            progress = int((i + 1) / total_segments * 100)
            
            # Format current segments
            formatted_desc = "### Video Analysis by Segments:\n\n"
            for seg in segments:
                formatted_desc += f"**[{seg['timestamp']}]** {seg['description']}\n\n"
            
            yield [
                f"Processing segments... {progress}% complete",
                formatted_desc,
                gr.update(visible=True)
            ]
        
        yield [
            "Processing complete!",
            formatted_desc,
            gr.update(visible=True)
        ]
        
    except Exception as e:
        logger.exception("Error processing video")
        yield [
            f"Error processing video: {str(e)}",
            "",
            gr.update(visible=False)
        ]
    finally:
        # Clean up
        torch.cuda.empty_cache()

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# SmolVLM Video Analyzer")
    gr.Markdown("Upload a video to get a detailed analysis of its content, split into segments with timestamps.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_video = gr.Video(
                label="Upload your video",
                interactive=True
            )
            process_btn = gr.Button("Process Video", variant="primary")
            
        with gr.Column(scale=1):
            status = gr.Markdown()
            analysis_accordion = gr.Accordion(
                "Analysis Details",
                open=True,
                visible=False
            )
            with analysis_accordion:
                video_description = gr.Markdown("")

    process_btn.click(
        on_process,
        inputs=[input_video],
        outputs=[
            status,
            video_description,
            analysis_accordion
        ],
        queue=True,
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )