PierreBrunelle commited on
Commit
214ca3f
·
verified ·
1 Parent(s): 36f43f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -80
app.py CHANGED
@@ -2,117 +2,187 @@ import gradio as gr
2
  import pixeltable as pxt
3
  from pixeltable.functions.huggingface import clip_image, clip_text
4
  from pixeltable.iterators import FrameIterator
5
- import PIL.Image
6
  import os
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Process video and create index
9
  def process_video(video_file, progress=gr.Progress()):
10
- progress(0, desc="Initializing...")
 
 
 
 
 
11
 
12
- # Pixeltable setup
13
- pxt.drop_dir('video_search', force=True)
14
- pxt.create_dir('video_search')
15
 
16
- # Update type declaration to use simpler syntax
17
- video_table = pxt.create_table('video_search.videos', {'video': pxt.Video})
18
 
19
- frames_view = pxt.create_view(
20
- 'video_search.frames',
21
- video_table,
22
- iterator=FrameIterator.create(video=video_table.video, fps=1)
23
- )
 
24
 
25
- progress(0.2, desc="Inserting video...")
26
- video_table.insert([{'video': video_file.name}])
27
-
28
- progress(0.4, desc="Creating embedding index...")
29
- # Updated embedding pattern using .using()
30
- frames_view.add_embedding_index(
31
- 'frame',
32
- string_embed=clip_text.using(model_id='openai/clip-vit-base-patch32'),
33
- image_embed=clip_image.using(model_id='openai/clip-vit-base-patch32')
34
- )
 
35
 
36
- progress(1.0, desc="Processing complete")
37
- return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images."
 
 
 
 
38
 
39
  # Perform similarity search
40
  def similarity_search(query, search_type, num_results, progress=gr.Progress()):
41
- frames_view = pxt.get_table('video_search.frames')
42
-
43
- progress(0.5, desc="Performing search...")
44
- if search_type == "Text":
45
- sim = frames_view.frame.similarity(query)
46
- else: # Image search
 
 
 
47
  sim = frames_view.frame.similarity(query)
 
 
 
 
 
 
 
 
48
 
49
- results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect()
50
-
51
- progress(1.0, desc="Search complete")
52
- return [row['frame'] for row in results]
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Gradio interface
55
- with gr.Blocks(theme=gr.themes.Base()) as demo:
56
- gr.Markdown(
57
  """
58
- <div style= margin-bottom: 20px;">
59
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
60
- <h2>Text and Image similarity search on video frames with embedding indexes</h2>
 
 
 
61
  </div>
62
  """
63
  )
64
- gr.HTML(
65
- """
66
- <p>
67
- <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
68
- </p>
69
- """
70
- )
71
-
72
 
73
  with gr.Row():
74
  with gr.Column(scale=1):
75
-
76
- gr.Markdown(
77
- """
78
- <h3>1. Insert video</h3>
79
- """)
80
 
81
- video_file = gr.File(label="Upload Video")
82
- process_button = gr.Button("Process Video")
83
  process_output = gr.Textbox(label="Status", lines=2)
84
 
85
- gr.Markdown(
86
- """
87
- <h3>2. Search video frames</h3>
88
- """)
89
 
90
- search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text")
91
- text_input = gr.Textbox(label="Text Query")
92
- image_input = gr.Image(label="Image Query", type="pil", visible=False)
93
- num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
94
- search_button = gr.Button("Search")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  with gr.Column(scale=2):
97
-
98
- gr.Markdown(
99
- """
100
- <h3>3. Visualize results</h3>
101
- """)
102
 
103
- results_gallery = gr.Gallery(label="Search Results", columns=3)
104
-
105
- gr.Examples(
106
- examples=[
107
- ["bangkok.mp4"],
108
- ["lotr.mp4"],
109
- ["mi.mp4"],
110
- ],
111
- label="Click one of the examples below to get started",
112
- inputs=[video_file],
113
- fn=process_video
114
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
116
  def update_search_input(choice):
117
  return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
118
 
@@ -126,6 +196,9 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
126
 
127
  def perform_search(search_type, text_query, image_query, num_results):
128
  query = text_query if search_type == "Text" else image_query
 
 
 
129
  return similarity_search(query, search_type, num_results)
130
 
131
  search_button.click(
@@ -134,5 +207,13 @@ with gr.Blocks(theme=gr.themes.Base()) as demo:
134
  outputs=[results_gallery]
135
  )
136
 
 
 
 
 
 
 
 
 
137
  if __name__ == "__main__":
138
  demo.launch()
 
2
  import pixeltable as pxt
3
  from pixeltable.functions.huggingface import clip_image, clip_text
4
  from pixeltable.iterators import FrameIterator
 
5
  import os
6
+ import logging
7
+
8
+ # Set up logging
9
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # Initialize Pixeltable directory constants
13
+ PROJECT_DIR = 'video_search'
14
+ VIDEOS_TABLE = f'{PROJECT_DIR}.videos'
15
+ FRAMES_VIEW = f'{PROJECT_DIR}.frames'
16
 
17
  # Process video and create index
18
  def process_video(video_file, progress=gr.Progress()):
19
+ if video_file is None:
20
+ return "Please upload a video file first."
21
+
22
+ try:
23
+ progress(0, desc="Initializing...")
24
+ logger.info(f"Processing video: {video_file.name}")
25
 
26
+ # Pixeltable setup
27
+ pxt.drop_dir(PROJECT_DIR, force=True)
28
+ pxt.create_dir(PROJECT_DIR)
29
 
30
+ # Create video table
31
+ video_table = pxt.create_table(VIDEOS_TABLE, {'video': pxt.Video})
32
 
33
+ # Create frames view
34
+ frames_view = pxt.create_view(
35
+ FRAMES_VIEW,
36
+ video_table,
37
+ iterator=FrameIterator.create(video=video_table.video, fps=1)
38
+ )
39
 
40
+ progress(0.2, desc="Inserting video...")
41
+ video_table.insert([{'video': video_file.name}])
42
+
43
+ progress(0.4, desc="Creating embedding index...")
44
+ # Use the CLIP model for both text and image embedding
45
+ clip_model = 'openai/clip-vit-base-patch32'
46
+ frames_view.add_embedding_index(
47
+ 'frame',
48
+ string_embed=clip_text.using(model_id=clip_model),
49
+ image_embed=clip_image.using(model_id=clip_model)
50
+ )
51
 
52
+ progress(1.0, desc="Processing complete")
53
+ return " Video processed successfully! You can now search for specific moments using text or images."
54
+
55
+ except Exception as e:
56
+ logger.error(f"Error processing video: {str(e)}")
57
+ return f"Error processing video: {str(e)}"
58
 
59
  # Perform similarity search
60
  def similarity_search(query, search_type, num_results, progress=gr.Progress()):
61
+ try:
62
+ if not query:
63
+ return []
64
+
65
+ frames_view = pxt.get_table(FRAMES_VIEW)
66
+ if frames_view is None:
67
+ return []
68
+
69
+ progress(0.5, desc="Performing search...")
70
  sim = frames_view.frame.similarity(query)
71
+
72
+ results = frames_view.order_by(sim, asc=False).limit(num_results).select(
73
+ frames_view.frame,
74
+ similarity=sim
75
+ ).collect()
76
+
77
+ progress(1.0, desc="Search complete")
78
+ return [row['frame'] for row in results]
79
 
80
+ except Exception as e:
81
+ logger.error(f"Error during search: {str(e)}")
82
+ return []
83
+
84
+ # Create CSS for better styling
85
+ css = """
86
+ .container {
87
+ max-width: 1200px;
88
+ margin: 0 auto;
89
+ }
90
+ .header {
91
+ display: flex;
92
+ align-items: center;
93
+ margin-bottom: 20px;
94
+ }
95
+ .header img {
96
+ max-width: 120px;
97
+ margin-right: 20px;
98
+ }
99
+ .step-header {
100
+ background-color: #f5f5f5;
101
+ padding: 10px;
102
+ border-radius: 5px;
103
+ margin-bottom: 15px;
104
+ }
105
+ .examples-section {
106
+ margin-top: 30px;
107
+ }
108
+ """
109
+
110
  # Gradio interface
111
+ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
112
+ gr.HTML(
113
  """
114
+ <div class="header">
115
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" />
116
+ <div>
117
+ <h1>Video Frame Search with AI</h1>
118
+ <p>Search through video content using natural language or images powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>.</p>
119
+ </div>
120
  </div>
121
  """
122
  )
 
 
 
 
 
 
 
 
123
 
124
  with gr.Row():
125
  with gr.Column(scale=1):
126
+ gr.HTML('<div class="step-header"><h3>1. Insert video</h3></div>')
 
 
 
 
127
 
128
+ video_file = gr.File(label="Upload Video", file_types=["video"])
129
+ process_button = gr.Button("Process Video", variant="primary")
130
  process_output = gr.Textbox(label="Status", lines=2)
131
 
132
+ gr.HTML('<div class="step-header"><h3>2. Search video frames</h3></div>')
 
 
 
133
 
134
+ search_type = gr.Radio(
135
+ ["Text", "Image"],
136
+ label="Search Type",
137
+ value="Text",
138
+ info="Choose whether to search using text or an image"
139
+ )
140
+ text_input = gr.Textbox(
141
+ label="Text Query",
142
+ placeholder="Describe what you're looking for...",
143
+ info="Example: 'person walking' or 'red car'"
144
+ )
145
+ image_input = gr.Image(
146
+ label="Image Query",
147
+ type="pil",
148
+ visible=False,
149
+ info="Upload an image to find similar frames"
150
+ )
151
+ num_results = gr.Slider(
152
+ minimum=1,
153
+ maximum=20,
154
+ value=5,
155
+ step=1,
156
+ label="Number of Results",
157
+ info="How many matching frames to display"
158
+ )
159
+ search_button = gr.Button("Search", variant="primary")
160
 
161
  with gr.Column(scale=2):
162
+ gr.HTML('<div class="step-header"><h3>3. Visualize results</h3></div>')
 
 
 
 
163
 
164
+ results_gallery = gr.Gallery(
165
+ label="Search Results",
166
+ columns=3,
167
+ allow_preview=True,
168
+ object_fit="contain"
 
 
 
 
 
 
169
  )
170
+
171
+ with gr.Accordion("Example Videos", open=False):
172
+ gr.Markdown("Click one of the examples below to get started")
173
+ gr.Examples(
174
+ examples=[
175
+ ["bangkok.mp4"],
176
+ ["lotr.mp4"],
177
+ ["mi.mp4"],
178
+ ],
179
+ inputs=[video_file],
180
+ outputs=[process_output],
181
+ fn=process_video,
182
+ cache_examples=True
183
+ )
184
 
185
+ # Handle UI interactions
186
  def update_search_input(choice):
187
  return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image")
188
 
 
196
 
197
  def perform_search(search_type, text_query, image_query, num_results):
198
  query = text_query if search_type == "Text" else image_query
199
+ if query is None or (isinstance(query, str) and query.strip() == ""):
200
+ return gr.Gallery(label="Please enter a valid search query")
201
+
202
  return similarity_search(query, search_type, num_results)
203
 
204
  search_button.click(
 
207
  outputs=[results_gallery]
208
  )
209
 
210
+ # Add keyboard shortcuts
211
+ search_type.change(lambda: None, None, None, _js="() => {document.activeElement.blur();}")
212
+ text_input.submit(
213
+ perform_search,
214
+ inputs=[search_type, text_input, image_input, num_results],
215
+ outputs=[results_gallery]
216
+ )
217
+
218
  if __name__ == "__main__":
219
  demo.launch()