File size: 7,233 Bytes
152a369 b659fec 152a369 214ca3f 152a369 214ca3f 152a369 214ca3f 152a369 214ca3f 152a369 214ca3f 152a369 214ca3f f895f15 214ca3f 152a369 214ca3f 152a369 b41cec1 214ca3f b41cec1 214ca3f b41cec1 214ca3f 152a369 214ca3f 152a369 214ca3f 152a369 b41cec1 214ca3f 645d14b 214ca3f b41cec1 214ca3f b41cec1 214ca3f 152a369 b41cec1 214ca3f 3a73782 214ca3f 645d14b 214ca3f b41cec1 214ca3f b41cec1 152a369 b41cec1 214ca3f b41cec1 152a369 214ca3f 152a369 b41cec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import gradio as gr
import pixeltable as pxt
from pixeltable.functions.huggingface import clip
from pixeltable.iterators import FrameIterator
import os
import logging
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Initialize Pixeltable directory constants
PROJECT_DIR = 'video_search'
VIDEOS_TABLE = f'{PROJECT_DIR}.videos'
FRAMES_VIEW = f'{PROJECT_DIR}.frames'
# Process video and create index
def process_video(video_file, progress=gr.Progress()):
if video_file is None:
return "Please upload a video file first."
try:
progress(0, desc="Initializing...")
logger.info(f"Processing video: {video_file.name}")
# Pixeltable setup
pxt.drop_dir(PROJECT_DIR, force=True)
pxt.create_dir(PROJECT_DIR)
# Create video table
video_table = pxt.create_table(VIDEOS_TABLE, {'video': pxt.Video})
# Create frames view
frames_view = pxt.create_view(
FRAMES_VIEW,
video_table,
iterator=FrameIterator.create(video=video_table.video, fps=1)
)
progress(0.2, desc="Inserting video...")
video_table.insert([{'video': video_file.name}])
progress(0.4, desc="Creating embedding index...")
# Use the CLIP model for both text and image embedding
frames_view.add_embedding_index(
'frame',
embedding=clip.using(model_id='openai/clip-vit-base-patch32')
)
progress(1.0, desc="Processing complete")
return "✅ Video processed successfully! You can now search for specific moments using text or images."
except Exception as e:
logger.error(f"Error processing video: {str(e)}")
return f"Error processing video: {str(e)}"
# Perform similarity search
def similarity_search(query, search_type, num_results, progress=gr.Progress()):
try:
if not query:
return []
frames_view = pxt.get_table(FRAMES_VIEW)
if frames_view is None:
return []
progress(0.5, desc="Performing search...")
sim = frames_view.frame.similarity(query)
results = frames_view.order_by(sim, asc=False).limit(num_results).select(
frames_view.frame,
similarity=sim
).collect()
progress(1.0, desc="Search complete")
return [row['frame'] for row in results]
except Exception as e:
logger.error(f"Error during search: {str(e)}")
return []
# Create CSS for better styling
css = """
.container {
max-width: 1200px;
margin: 0 auto;
}
.header {
display: flex;
align-items: center;
margin-bottom: 20px;
}
.header img {
max-width: 120px;
margin-right: 20px;
}
.step-header {
background-color: #f5f5f5;
padding: 10px;
border-radius: 5px;
margin-bottom: 15px;
}
.examples-section {
margin-top: 30px;
}
"""
# Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
gr.HTML(
"""
<div class="header">
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" />
<div>
<h1>Video Frame Search with AI</h1>
<p>Search through video content using natural language or images powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>.</p>
</div>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.HTML('<div class="step-header"><h3>1. Insert video</h3></div>')
video_file = gr.File(label="Upload Video", file_types=["video"])
process_button = gr.Button("Process Video", variant="primary")
process_output = gr.Textbox(label="Status", lines=2)
gr.HTML('<div class="step-header"><h3>2. Search video frames</h3></div>')
search_type = gr.Radio(
["Text", "Image"],
label="Search Type",
value="Text",
info="Choose whether to search using text or an image"
)
text_input = gr.Textbox(
label="Text Query",
placeholder="Describe what you're looking for...",
info="Example: 'person walking' or 'red car'"
)
image_input = gr.Image(
label="Image Query",
type="pil",
visible=False,
info="Upload an image to find similar frames"
)
num_results = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="Number of Results",
info="How many matching frames to display"
)
search_button = gr.Button("Search", variant="primary")
with gr.Column(scale=2):
gr.HTML('<div class="step-header"><h3>3. Visualize results</h3></div>')
results_gallery = gr.Gallery(
label="Search Results",
columns=3,
allow_preview=True,
object_fit="contain"
)
with gr.Accordion("Example Videos", open=False):
gr.Markdown("Click one of the examples below to get started")
gr.Examples(
examples=[
["bangkok.mp4"],
["lotr.mp4"],
["mi.mp4"],
],
inputs=[video_file],
outputs=[process_output],
fn=process_video,
cache_examples=True
)
# Handle UI interactions
def update_search_input(choice):
return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
search_type.change(update_search_input, search_type, [text_input, image_input])
process_button.click(
process_video,
inputs=[video_file],
outputs=[process_output]
)
def perform_search(search_type, text_query, image_query, num_results):
query = text_query if search_type == "Text" else image_query
if query is None or (isinstance(query, str) and query.strip() == ""):
return gr.Gallery(label="Please enter a valid search query")
return similarity_search(query, search_type, num_results)
search_button.click(
perform_search,
inputs=[search_type, text_input, image_input, num_results],
outputs=[results_gallery]
)
# Add keyboard shortcuts
search_type.change(lambda: None, None, None, _js="() => {document.activeElement.blur();}")
text_input.submit(
perform_search,
inputs=[search_type, text_input, image_input, num_results],
outputs=[results_gallery]
)
if __name__ == "__main__":
demo.launch() |