Spaces:

Pixeltable
/

Text-image-similarity-search-on-video-frames-embedding-indexes

Running

App Files Files Community

PierreBrunelle commited on 4 days ago

Commit

2be8e0f

verified ·

1 Parent(s): f895f15

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -159

app.py CHANGED Viewed

@@ -2,185 +2,116 @@ import gradio as gr
 import pixeltable as pxt
 from pixeltable.functions.huggingface import clip
 from pixeltable.iterators import FrameIterator
 import os
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# Initialize Pixeltable directory constants
-PROJECT_DIR = 'video_search'
-VIDEOS_TABLE = f'{PROJECT_DIR}.videos'
-FRAMES_VIEW = f'{PROJECT_DIR}.frames'
 # Process video and create index
 def process_video(video_file, progress=gr.Progress()):
-    if video_file is None:
-        return "Please upload a video file first."
-    try:
-        progress(0, desc="Initializing...")
-        logger.info(f"Processing video: {video_file.name}")
-        # Pixeltable setup
-        pxt.drop_dir(PROJECT_DIR, force=True)
-        pxt.create_dir(PROJECT_DIR)
-        # Create video table
-        video_table = pxt.create_table(VIDEOS_TABLE, {'video': pxt.Video})
-        # Create frames view
-        frames_view = pxt.create_view(
-            FRAMES_VIEW,
-            video_table,
-            iterator=FrameIterator.create(video=video_table.video, fps=1)
-        )
-        progress(0.2, desc="Inserting video...")
-        video_table.insert([{'video': video_file.name}])
-        progress(0.4, desc="Creating embedding index...")
-        # Use the CLIP model for both text and image embedding
-        frames_view.add_embedding_index(
-            'frame',
-            embedding=clip.using(model_id='openai/clip-vit-base-patch32')
-        )
-        progress(1.0, desc="Processing complete")
-        return "✅ Video processed successfully! You can now search for specific moments using text or images."
-    except Exception as e:
-        logger.error(f"Error processing video: {str(e)}")
-        return f"Error processing video: {str(e)}"
 # Perform similarity search
 def similarity_search(query, search_type, num_results, progress=gr.Progress()):
-    try:
-        if not query:
-            return []
-        frames_view = pxt.get_table(FRAMES_VIEW)
-        if frames_view is None:
-            return []
-        progress(0.5, desc="Performing search...")
         sim = frames_view.frame.similarity(query)
-        results = frames_view.order_by(sim, asc=False).limit(num_results).select(
-            frames_view.frame,
-            similarity=sim
-        ).collect()
-        progress(1.0, desc="Search complete")
-        return [row['frame'] for row in results]
-    except Exception as e:
-        logger.error(f"Error during search: {str(e)}")
-        return []
-# Create CSS for better styling
-css = """
-.container {
-    max-width: 1200px;
-    margin: 0 auto;
-}
-.header {
-    display: flex;
-    align-items: center;
-    margin-bottom: 20px;
-}
-.header img {
-    max-width: 120px;
-    margin-right: 20px;
-}
-.step-header {
-    background-color: #f5f5f5;
-    padding: 10px;
-    border-radius: 5px;
-    margin-bottom: 15px;
-}
-.examples-section {
-    margin-top: 30px;
-}
-"""
 # Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
-    gr.HTML(
         """
-        <div class="header">
-            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" />
-            <div>
-                <h1>Video Frame Search with AI</h1>
-                <p>Search through video content using natural language or images powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>.</p>
-            </div>
         </div>
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
-            gr.HTML('<div class="step-header"><h3>1. Insert video</h3></div>')
-            video_file = gr.File(label="Upload Video", file_types=["video"])
-            process_button = gr.Button("Process Video", variant="primary")
             process_output = gr.Textbox(label="Status", lines=2)
-            gr.HTML('<div class="step-header"><h3>2. Search video frames</h3></div>')
-            search_type = gr.Radio(
-                ["Text", "Image"],
-                label="Search Type",
-                value="Text",
-                info="Choose whether to search using text or an image"
-            )
-            text_input = gr.Textbox(
-                label="Text Query",
-                placeholder="Describe what you're looking for...",
-                info="Example: 'person walking' or 'red car'"
-            )
-            image_input = gr.Image(
-                label="Image Query",
-                type="pil",
-                visible=False,
-                info="Upload an image to find similar frames"
-            )
-            num_results = gr.Slider(
-                minimum=1,
-                maximum=20,
-                value=5,
-                step=1,
-                label="Number of Results",
-                info="How many matching frames to display"
-            )
-            search_button = gr.Button("Search", variant="primary")
         with gr.Column(scale=2):
-            gr.HTML('<div class="step-header"><h3>3. Visualize results</h3></div>')
-            results_gallery = gr.Gallery(
-                label="Search Results",
-                columns=3,
-                allow_preview=True,
-                object_fit="contain"
-            )
-            with gr.Accordion("Example Videos", open=False):
-                gr.Markdown("Click one of the examples below to get started")
-                gr.Examples(
-                    examples=[
-                        ["bangkok.mp4"],
-                        ["lotr.mp4"],
-                        ["mi.mp4"],
-                    ],
-                    inputs=[video_file],
-                    outputs=[process_output],
-                    fn=process_video,
-                    cache_examples=True
-                )
-    # Handle UI interactions
     def update_search_input(choice):
         return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
@@ -194,9 +125,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
     def perform_search(search_type, text_query, image_query, num_results):
         query = text_query if search_type == "Text" else image_query
-        if query is None or (isinstance(query, str) and query.strip() == ""):
-            return gr.Gallery(label="Please enter a valid search query")
         return similarity_search(query, search_type, num_results)
     search_button.click(
@@ -205,13 +133,5 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
         outputs=[results_gallery]
     )
-    # Add keyboard shortcuts
-    search_type.change(lambda: None, None, None, _js="() => {document.activeElement.blur();}")
-    text_input.submit(
-        perform_search,
-        inputs=[search_type, text_input, image_input, num_results],
-        outputs=[results_gallery]
-    )
 if __name__ == "__main__":
     demo.launch()

 import pixeltable as pxt
 from pixeltable.functions.huggingface import clip
 from pixeltable.iterators import FrameIterator
+import PIL.Image
 import os
 # Process video and create index
 def process_video(video_file, progress=gr.Progress()):
+    progress(0, desc="Initializing...")
+    # Pixeltable setup
+    pxt.drop_dir('video_search', force=True)
+    pxt.create_dir('video_search')
+    # Update type declaration to use simpler syntax
+    video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})
+    frames_view = pxt.create_view(
+        'video_search.frames',
+        video_table,
+        iterator=FrameIterator.create(video=video_table.video, fps=1)
+    )
+    progress(0.2, desc="Inserting video...")
+    video_table.insert([{'video': video_file.name}])
+    progress(0.4, desc="Creating embedding index...")
+    # Updated embedding pattern using .using()
+    frames_view.add_embedding_index(
+        'frame',
+        embedding=clip.using(model_id='openai/clip-vit-base-patch32')
+    )
+    progress(1.0, desc="Processing complete")
+    return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."
 # Perform similarity search
 def similarity_search(query, search_type, num_results, progress=gr.Progress()):
+    frames_view = pxt.get_table('video_search.frames')
+    progress(0.5, desc="Performing search...")
+    if search_type == "Text":
+        sim = frames_view.frame.similarity(query)
+    else:  # Image search
         sim = frames_view.frame.similarity(query)
+    results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()
+    progress(1.0, desc="Search complete")
+    return [row['frame'] for row in results]
 # Gradio interface
+with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown(
         """
+        <div style= margin-bottom: 20px;">
+            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
+            <h2>Text and Image similarity search on video frames with embedding indexes</h2>
         </div>
         """
     )
+    gr.HTML(
+    """
+    <p>
+        <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
+    </p>
+    """
+    )
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown(
+            """
+            <h3>1. Insert video</h3>
+            """)
+            video_file = gr.File(label="Upload Video")
+            process_button = gr.Button("Process Video")
             process_output = gr.Textbox(label="Status", lines=2)
+            gr.Markdown(
+            """
+            <h3>2. Search video frames</h3>
+            """)
+            search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
+            text_input = gr.Textbox(label="Text Query")
+            image_input = gr.Image(label="Image Query", type="pil", visible=False)
+            num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
+            search_button = gr.Button("Search")
         with gr.Column(scale=2):
+            gr.Markdown(
+            """
+            <h3>3. Visualize results</h3>
+            """)
+            results_gallery = gr.Gallery(label="Search Results", columns=3)
+            gr.Examples(
+            examples=[
+                ["bangkok.mp4"],
+                ["lotr.mp4"],
+                ["mi.mp4"],
+            ],
+            label="Click one of the examples below to get started",
+            inputs=[video_file],
+            fn=process_video
+            )
     def update_search_input(choice):
         return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
     def perform_search(search_type, text_query, image_query, num_results):
         query = text_query if search_type == "Text" else image_query
         return similarity_search(query, search_type, num_results)
     search_button.click(
         outputs=[results_gallery]
     )
 if __name__ == "__main__":
     demo.launch()