File size: 3,538 Bytes
152a369
 
02ace09
152a369
2be8e0f
152a369
 
 
 
2be8e0f
152a369
2be8e0f
 
 
152a369
b991864
2be8e0f
152a369
2be8e0f
 
 
 
 
152a369
2be8e0f
 
214ca3f
2be8e0f
b991864
2be8e0f
 
02ace09
 
2be8e0f
 
 
 
152a369
b41cec1
 
2be8e0f
 
 
b991864
 
b41cec1
2be8e0f
 
 
 
 
b991864
689022e
b991864
b41cec1
 
 
2be8e0f
 
b41cec1
 
2be8e0f
 
 
 
 
152a369
b41cec1
2be8e0f
b41cec1
 
 
152a369
b41cec1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152a369
b991864
152a369
b991864
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import pixeltable as pxt
from pixeltable.functions.huggingface import clip
from pixeltable.iterators import FrameIterator
import PIL.Image
import os

# Process video and create index
def process_video(video_file, progress=gr.Progress()):
    progress(0, desc="Initializing...")

    # Pixeltable setup
    pxt.drop_dir('video_search', force=True)
    pxt.create_dir('video_search')

    # Create the video table
    video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})

    frames_view = pxt.create_view(
        'video_search.frames', 
        video_table, 
        iterator=FrameIterator.create(video=video_table.video, fps=1)
    )

    progress(0.2, desc="Inserting video...")
    video_table.insert([{'video': video_file.name}])
    
    progress(0.4, desc="Creating embedding index...")
    # Use separate methods for image and text embedding
    frames_view.add_embedding_index(
        'frame',
        image_embed=clip.using(model_id='openai/clip-vit-base-patch32'),
        string_embed=clip.using(model_id='openai/clip-vit-base-patch32')
    )

    progress(1.0, desc="Processing complete")
    return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."

# Perform similarity search
def similarity_search(query, search_type, num_results, progress=gr.Progress()):
    frames_view = pxt.get_table('video_search.frames')
    
    progress(0.5, desc="Performing search...")
    # The query is the same either way, but this makes it clearer what's happening
    sim = frames_view.frame.similarity(query)
    
    results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()
    
    progress(1.0, desc="Search complete")
    return [row['frame'] for row in results]
    
# Simplified gradio interface with minimal styling
with gr.Blocks() as demo:
    gr.Markdown("# Text and Image Search on Video Frames with Pixeltable")
    
    with gr.Row():
        with gr.Column(scale=1):
            video_file = gr.File(label="Upload Video")
            process_button = gr.Button("Process Video")
            process_output = gr.Textbox(label="Status", lines=2)
            
            search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
            text_input = gr.Textbox(label="Text Query")
            image_input = gr.Image(label="Image Query", type="pil", visible=False)
            num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
            search_button = gr.Button("Search")
        
        with gr.Column(scale=2):
            results_gallery = gr.Gallery(label="Search Results", columns=3)
    
    def update_search_input(choice):
        return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")

    search_type.change(update_search_input, search_type, [text_input, image_input])
    
    process_button.click(
        process_video,
        inputs=[video_file],
        outputs=[process_output]
    )
    
    def perform_search(search_type, text_query, image_query, num_results):
        query = text_query if search_type == "Text" else image_query
        return similarity_search(query, search_type, num_results)

    search_button.click(
        perform_search,
        inputs=[search_type, text_input, image_input, num_results],
        outputs=[results_gallery]
    )

# Launch with share=True to fix the localhost issue
if __name__ == "__main__":
    demo.launch(share=True)